Compare commits

..

7 Commits

Author SHA1 Message Date
Hans-Kristian Arntzen b817f972f4 tests: Add test coverage for two stages of AddToStateObject().
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 15:15:18 +02:00
Hans-Kristian Arntzen b9c575d678 tests: Add test for AddToStateObject.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 15:15:18 +02:00
Hans-Kristian Arntzen ab85bdf94f tests: Add default NODE_MASK state object to RTPSO tests.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 15:15:18 +02:00
Hans-Kristian Arntzen 3d1b2d2042 vkd3d: Implement AddToStateObject().
This is barely implementable, and relies on implementations to do kinda
what we want.

To make this work in practice, we need to allow two pipelines per state
object. One that is created with LIBRARY and one that can be bound. When
incrementing the PSO, we use the LIBRARY one.

It seems to be allowed to create a new library from an old library.
It is more convenient for us if we're allowed to do this, so do this
until we're forced to do otherwise.

DXR 1.1 requires that shader identifiers remain invariant for child
pipelines if the parent pipeline also have them.
Vulkan has no such guarantee, but we can speculate that it works and
validate that identifiers remain invariant. This seems to work fine on
NVIDIA at least ... It probably makes sense that it works for
implementations where pipeline libraries are compiled at that time.

The basic implementation of AddToStateObject() is to consider
the parent pipeline as a COLLECTION pipeline. This composes well and
avoids a lot of extra implementation cruft.

Also adds validation to ensure that COLLECTION global state matches with
other COLLECTION objects and the parent. We will also inherit global
state like root signatures, pipeline config, shader configs etc when
using AddToStateObject().

The tests pass on NVIDIA at least.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 14:59:07 +02:00
Hans-Kristian Arntzen ccf927da7f vkd3d: Hold private ownership over global root signature.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 14:59:02 +02:00
Hans-Kristian Arntzen c940d548f4 vkd3d: Allow different but compatible global root signature objects.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 14:59:02 +02:00
Hans-Kristian Arntzen 53309d05a1 vkd3d: Ignore NODE_MASK subobjects.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2022-05-04 14:57:24 +02:00
60 changed files with 1227 additions and 8520 deletions

View File

@ -15,10 +15,11 @@ jobs:
- name: Build release
id: build-release
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
export VERSION_NAME="${GITHUB_REF##*/}-${GITHUB_SHA##*/}"
git config --global --add safe.directory "$GITHUB_WORKSPACE"
./package-release.sh ${VERSION_NAME} build --no-package
echo "VERSION_NAME=${VERSION_NAME}" >> $GITHUB_ENV

View File

@ -18,58 +18,64 @@ jobs:
- name: Build MinGW x86
id: build-mingw-x86
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win32.txt --buildtype release build-mingw-x86
ninja -C build-mingw-x86
- name: Build MinGW x64
id: build-mingw-x64
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win64.txt --buildtype release build-mingw-x64
ninja -C build-mingw-x64
- name: Build Native GCC x86
id: build-native-gcc-x86
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
export CC="gcc -m32"
export CXX="g++ -m32"
export PKG_CONFIG_PATH="/usr/lib32/pkgconfig:/usr/lib/i386-linux-gnu/pkgconfig:/usr/lib/pkgconfig"
git config --global --add safe.directory "$GITHUB_WORKSPACE"
meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-gcc-x86
ninja -C build-native-gcc-x86
- name: Build Native GCC x64
id: build-native-gcc-x64
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
export CC="gcc"
export CXX="g++"
git config --global --add safe.directory "$GITHUB_WORKSPACE"
meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-gcc-x64
ninja -C build-native-gcc-x64
- name: Build Native Clang x86
id: build-native-clang-x86
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
export CC="clang -m32"
export CXX="clang++ -m32"
export PKG_CONFIG_PATH="/usr/lib32/pkgconfig:/usr/lib/i386-linux-gnu/pkgconfig:/usr/lib/pkgconfig"
git config --global --add safe.directory "$GITHUB_WORKSPACE"
meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-clang-x86
ninja -C build-native-clang-x86
- name: Build Native Clang x64
id: build-native-clang-x64
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v7
with:
command: |
export CC="clang"
export CXX="clang++"
git config --global --add safe.directory "$GITHUB_WORKSPACE"
meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-clang-x64
ninja -C build-native-clang-x64

View File

@ -456,8 +456,13 @@ static void cxg_mesh_create(ID3D12Device *device, float inner_radius, float oute
float r0, r1, r2;
float angle, da;
vertices = calloc(tooth_count, 12 * sizeof(*vertices));
faces = calloc(tooth_count, 20 * sizeof(*faces));
if (!(vertices = calloc(tooth_count, 12 * sizeof(*vertices))))
return;
if (!(faces = calloc(tooth_count, 20 * sizeof(*faces))))
{
free(vertices);
return;
}
r0 = inner_radius;
r1 = outer_radius - tooth_depth / 2.0f;

View File

@ -27,7 +27,6 @@
#include <stdint.h>
#include <limits.h>
#include <stdbool.h>
#include <assert.h>
#ifdef _MSC_VER
#include <intrin.h>
@ -47,13 +46,11 @@
static inline uint64_t align64(uint64_t addr, uint64_t alignment)
{
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
return (addr + (alignment - 1)) & ~(alignment - 1);
}
static inline size_t align(size_t addr, size_t alignment)
{
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
return (addr + (alignment - 1)) & ~(alignment - 1);
}
@ -311,19 +308,4 @@ static inline uint64_t vkd3d_get_current_time_ns(void)
#endif
}
#ifdef _MSC_VER
#pragma intrinsic(__rdtsc)
#endif
static inline uint64_t vkd3d_get_current_time_ticks(void)
{
#ifdef _MSC_VER
return __rdtsc();
#elif defined(__i386__) || defined(__x86_64__)
return __builtin_ia32_rdtsc();
#else
return vkd3d_get_current_time_ns();
#endif
}
#endif /* __VKD3D_COMMON_H */

View File

@ -23,7 +23,6 @@
#include <stdbool.h>
#include <stdlib.h>
#include "vkd3d_common.h"
#include "vkd3d_debug.h"
static inline void *vkd3d_malloc(size_t size)
@ -58,12 +57,12 @@ static inline void vkd3d_free(void *ptr)
bool vkd3d_array_reserve(void **elements, size_t *capacity,
size_t element_count, size_t element_size);
static inline void *vkd3d_malloc_aligned(size_t size, size_t alignment)
static inline void *vkd3d_malloc_aligned(size_t size, size_t align)
{
#ifdef _WIN32
return _aligned_malloc(size, alignment);
return _aligned_malloc(size, align);
#else
return aligned_alloc(alignment, align(size, alignment));
return aligned_alloc(align, size);
#endif
}

View File

@ -25,6 +25,13 @@
#ifdef VKD3D_ENABLE_PROFILING
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <time.h>
#endif
void vkd3d_init_profiling(void);
bool vkd3d_uses_profiling(void);
unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch);
@ -41,12 +48,12 @@ void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint6
do { \
if (!(_vkd3d_region_index_##name = vkd3d_atomic_uint32_load_explicit(&_vkd3d_region_latch_##name, vkd3d_memory_order_acquire))) \
_vkd3d_region_index_##name = vkd3d_profiling_register_region(#name, &_vkd3d_region_lock_##name, &_vkd3d_region_latch_##name); \
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ticks(); \
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ns(); \
} while(0)
#define VKD3D_REGION_END_ITERATIONS(name, iter) \
do { \
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ticks(); \
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ns(); \
vkd3d_profiling_notify_work(_vkd3d_region_index_##name, _vkd3d_region_begin_tick_##name, _vkd3d_region_end_tick_##name, iter); \
} while(0)

View File

@ -30,7 +30,6 @@ WCHAR *vkd3d_dup_demangled_entry_point(const char *str);
char *vkd3d_dup_demangled_entry_point_ascii(const char *str);
bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b);
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b);
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t n, const WCHAR *b);
char *vkd3d_strdup(const char *str);

View File

@ -6,12 +6,6 @@ COMP_SOURCES := $(wildcard $(M)/*.comp)
TESC_SOURCES := $(wildcard $(M)/*.tesc)
TESE_SOURCES := $(wildcard $(M)/*.tese)
GEOM_SOURCES := $(wildcard $(M)/*.geom)
RGEN_SOURCES := $(wildcard $(M)/*.rgen)
RINT_SOURCES := $(wildcard $(M)/*.rint)
RAHIT_SOURCES := $(wildcard $(M)/*.rahit)
RCHIT_SOURCES := $(wildcard $(M)/*.rchit)
RMISS_SOURCES := $(wildcard $(M)/*.rmiss)
RCALL_SOURCES := $(wildcard $(M)/*.rcall)
SPV_OBJECTS := \
$(VERT_SOURCES:.vert=.spv) \
@ -19,49 +13,25 @@ SPV_OBJECTS := \
$(COMP_SOURCES:.comp=.spv) \
$(TESC_SOURCES:.tesc=.spv) \
$(TESE_SOURCES:.tese=.spv) \
$(GEOM_SOURCES:.geom=.spv) \
$(RGEN_SOURCES:.rgen=.spv) \
$(RINT_SOURCES:.rint=.spv) \
$(RAHIT_SOURCES:.rahit=.spv) \
$(RCHIT_SOURCES:.rchit=.spv) \
$(RMISS_SOURCES:.rmiss=.spv) \
$(RCALL_SOURCES:.rcall=.spv)
$(GEOM_SOURCES:.geom=.spv)
%.spv: %.vert
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1
%.spv: %.frag
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 -DDEBUG_CHANNEL_HELPER_LANES $(GLSLC_FLAGS)
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 -DDEBUG_CHANNEL_HELPER_LANES
%.spv: %.comp
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1
%.spv: %.geom
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1
%.spv: %.tesc
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1
%.spv: %.tese
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
%.spv: %.rgen
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rint
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rahit
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rchit
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rmiss
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rcall
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1
all: $(SPV_OBJECTS)

View File

@ -97,14 +97,6 @@ void DEBUG_CHANNEL_INIT(uvec3 id)
#endif
}
void DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3 id, uint inst)
{
if (!DEBUG_SHADER_RING_ACTIVE)
return;
DEBUG_CHANNEL_ID = id;
DEBUG_CHANNEL_INSTANCE_COUNTER = inst;
}
void DEBUG_CHANNEL_UNLOCK_MESSAGE(RingBuffer buf, uint offset, uint num_words)
{
memoryBarrierBuffer();

View File

@ -59,39 +59,38 @@
extern "C" {
#endif /* __cplusplus */
#define VKD3D_CONFIG_FLAG_VULKAN_DEBUG (1ull << 0)
#define VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS (1ull << 1)
#define VKD3D_CONFIG_FLAG_DEBUG_UTILS (1ull << 2)
#define VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV (1ull << 3)
#define VKD3D_CONFIG_FLAG_DXR (1ull << 4)
#define VKD3D_CONFIG_FLAG_SINGLE_QUEUE (1ull << 5)
#define VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS (1ull << 6)
#define VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE (1ull << 7)
#define VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE (1ull << 8)
#define VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE (1ull << 9)
#define VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV (1ull << 10)
#define VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET (1ull << 11)
#define VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE (1ull << 12)
#define VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED (1ull << 13)
#define VKD3D_CONFIG_FLAG_DXR11 (1ull << 14)
#define VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION (1ull << 15)
#define VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE (1ull << 16)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV (1ull << 17)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV (1ull << 18)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG (1ull << 19)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV (1ull << 20)
#define VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET (1ull << 21)
#define VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR (1ull << 22)
#define VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS (1ull << 23)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER (1ull << 24)
#define VKD3D_CONFIG_FLAG_BREADCRUMBS (1ull << 25)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY (1ull << 26)
#define VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC (1ull << 27)
#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
#define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30)
#define VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16 (1ull << 31)
#define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32)
enum vkd3d_config_flags
{
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002,
VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004,
VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008,
VKD3D_CONFIG_FLAG_DXR = 0x00000010,
VKD3D_CONFIG_FLAG_SINGLE_QUEUE = 0x00000020,
VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS = 0x00000040,
VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE = 0x00000080,
VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE = 0x00000100,
VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE = 0x00000200,
VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV = 0x00000400,
VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET = 0x00000800,
VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE = 0x00001000,
VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED = 0x00002000,
VKD3D_CONFIG_FLAG_DXR11 = 0x00004000,
VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION = 0x00008000,
VKD3D_CONFIG_FLAG_WORKAROUND_MISSING_COLOR_COMPUTE_BARRIERS = 0x00010000,
VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE = 0x00020000,
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV = 0x00040000,
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV = 0x00080000,
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG = 0x00100000,
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV = 0x00200000,
VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET = 0x00400000,
VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR = 0x00800000,
VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS = 0x01000000,
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER = 0x02000000,
VKD3D_CONFIG_FLAG_BREADCRUMBS = 0x04000000,
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY = 0x08000000,
VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC = 0x10000000,
};
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);

View File

@ -3644,8 +3644,8 @@ interface ID3D12CommandQueue : ID3D12Pageable
ID3D12Heap *heap,
UINT range_count,
const D3D12_TILE_RANGE_FLAGS *range_flags,
const UINT *heap_range_offsets,
const UINT *range_tile_counts,
UINT *heap_range_offsets,
UINT *range_tile_counts,
D3D12_TILE_MAPPING_FLAGS flags);
void CopyTileMappings(ID3D12Resource *dst_resource,

View File

@ -241,7 +241,6 @@ struct vkd3d_shader_root_constant
struct vkd3d_shader_root_descriptor
{
struct vkd3d_shader_resource_binding *binding;
uint32_t raw_va_root_descriptor_index;
};
struct vkd3d_shader_root_parameter
@ -309,9 +308,6 @@ enum vkd3d_shader_target_extension
* all in range, or all out of range. We can implement structured buffer vectorization of vec3,
* but not byte address buffer. */
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS,
VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR,
VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT,
VKD3D_SHADER_TARGET_EXTENSION_COUNT,
};
enum vkd3d_shader_quirk
@ -660,7 +656,6 @@ struct vkd3d_shader_scan_info
bool has_side_effects;
bool needs_late_zs;
bool discards;
bool has_uav_counter;
unsigned int patch_vertex_count;
};
@ -754,11 +749,7 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *code);
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *root_signature,
vkd3d_shader_hash_t *compatibility_hash);
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash);
struct vkd3d_versioned_root_signature_desc *root_signature);
void vkd3d_shader_free_root_signature(struct vkd3d_versioned_root_signature_desc *root_signature);
/* FIXME: Add support for returning error messages (ID3DBlob). */
@ -784,65 +775,19 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
struct vkd3d_shader_library_entry_point
{
unsigned int identifier;
VkShaderStageFlagBits stage;
WCHAR *mangled_entry_point;
WCHAR *plain_entry_point;
char *real_entry_point;
VkShaderStageFlagBits stage;
};
enum vkd3d_shader_subobject_kind
{
/* Matches DXIL for simplicity. */
VKD3D_SHADER_SUBOBJECT_KIND_STATE_OBJECT_CONFIG = 0,
VKD3D_SHADER_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE = 1,
VKD3D_SHADER_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE = 2,
VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION = 8,
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG = 9,
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG = 10,
VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP = 11,
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1 = 12,
};
struct vkd3d_shader_library_subobject
{
enum vkd3d_shader_subobject_kind kind;
unsigned int dxil_identifier;
/* All const pointers here point directly to the DXBC blob,
* so they do not need to be freed.
* Fortunately for us, the C strings are zero-terminated in the blob itself. */
/* In the blob, ASCII is used as identifier, where API uses wide strings, sigh ... */
const char *name;
union
{
D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config;
D3D12_RAYTRACING_SHADER_CONFIG shader_config;
D3D12_STATE_OBJECT_CONFIG object_config;
/* Duped strings because API wants wide strings for no good reason. */
D3D12_HIT_GROUP_DESC hit_group;
D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION association;
struct
{
const void *data;
size_t size;
} payload;
} data;
};
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
int vkd3d_shader_dxil_append_library_entry_points(
const D3D12_DXIL_LIBRARY_DESC *library_desc,
unsigned int identifier,
struct vkd3d_shader_library_entry_point **entry_points,
size_t *entry_point_size, size_t *entry_point_count,
struct vkd3d_shader_library_subobject **subobjects,
size_t *subobjects_size, size_t *subobjects_count);
size_t *entry_point_size, size_t *entry_point_count);
void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_entry_point *entry_points, size_t count);
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count);
int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
const char *export,
@ -868,8 +813,7 @@ typedef int (*PFN_vkd3d_shader_compile_dxbc)(const struct vkd3d_shader_code *dxb
typedef void (*PFN_vkd3d_shader_free_shader_code)(struct vkd3d_shader_code *code);
typedef int (*PFN_vkd3d_shader_parse_root_signature)(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *root_signature,
vkd3d_shader_hash_t *compatibility_hash);
struct vkd3d_versioned_root_signature_desc *root_signature);
typedef void (*PFN_vkd3d_shader_free_root_signature)(struct vkd3d_versioned_root_signature_desc *root_signature);
typedef int (*PFN_vkd3d_shader_serialize_root_signature)(

View File

@ -82,21 +82,6 @@ bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b)
return *a == *b;
}
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b)
{
if (!a || !b)
return false;
while (*a != '\0' && *b != '\0')
{
if (*a != *b)
return false;
a++;
b++;
}
return *a == *b;
}
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t expected_n, const WCHAR *b)
{
size_t n = 0;

View File

@ -2755,9 +2755,8 @@ static int shader_parse_static_samplers(struct root_signature_parser_context *co
return VKD3D_OK;
}
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash)
static int shader_parse_root_signature(const char *data, unsigned int data_size,
struct vkd3d_versioned_root_signature_desc *desc)
{
struct vkd3d_root_signature_desc *v_1_0 = &desc->v_1_0;
struct root_signature_parser_context context;
@ -2765,8 +2764,6 @@ int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_si
const char *ptr = data;
int ret;
memset(desc, 0, sizeof(*desc));
context.data = data;
context.data_size = data_size;
@ -2838,46 +2835,28 @@ int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_si
read_uint32(&ptr, &v_1_0->flags);
TRACE("Flags %#x.\n", v_1_0->flags);
if (compatibility_hash)
{
struct vkd3d_shader_code code = { data, data_size };
*compatibility_hash = vkd3d_shader_hash(&code);
}
return VKD3D_OK;
}
static int rts0_handler(const char *data, DWORD data_size, DWORD tag, void *context)
{
struct vkd3d_shader_code *payload = context;
struct vkd3d_versioned_root_signature_desc *desc = context;
if (tag != TAG_RTS0)
return VKD3D_OK;
payload->code = data;
payload->size = data_size;
return VKD3D_OK;
return shader_parse_root_signature(data, data_size, desc);
}
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *root_signature,
vkd3d_shader_hash_t *compatibility_hash)
struct vkd3d_versioned_root_signature_desc *root_signature)
{
struct vkd3d_shader_code raw_payload;
int ret;
TRACE("dxbc {%p, %zu}, root_signature %p.\n", dxbc->code, dxbc->size, root_signature);
memset(&raw_payload, 0, sizeof(raw_payload));
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, &raw_payload)) < 0)
return ret;
if (!raw_payload.code)
return VKD3D_ERROR;
if ((ret = vkd3d_shader_parse_root_signature_raw(raw_payload.code, raw_payload.size,
root_signature, compatibility_hash)) < 0)
memset(root_signature, 0, sizeof(*root_signature));
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, root_signature)) < 0)
{
vkd3d_shader_free_root_signature(root_signature);
return ret;

View File

@ -764,30 +764,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR)
{
static const dxil_spv_option_barycentric_khr helper =
{ { DXIL_SPV_OPTION_BARYCENTRIC_KHR }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support BARYCENTRIC_KHR.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
{
static const dxil_spv_option_min_precision_native_16bit helper =
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
}
if (compiler_args->dual_source_blending)
@ -1274,18 +1250,6 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
{
static const dxil_spv_option_min_precision_native_16bit helper =
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
}
}
@ -1352,31 +1316,6 @@ void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_ent
vkd3d_free(entry_points);
}
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count)
{
size_t i, j;
for (i = 0; i < count; i++)
{
if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION)
{
for (j = 0; j < subobjects[i].data.association.NumExports; j++)
vkd3d_free((void*)subobjects[i].data.association.pExports[j]);
vkd3d_free((void*)subobjects[i].data.association.pExports);
vkd3d_free((void*)subobjects[i].data.association.SubobjectToAssociate);
}
else if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP)
{
vkd3d_free((void*)subobjects[i].data.hit_group.HitGroupExport);
vkd3d_free((void*)subobjects[i].data.hit_group.AnyHitShaderImport);
vkd3d_free((void*)subobjects[i].data.hit_group.ClosestHitShaderImport);
vkd3d_free((void*)subobjects[i].data.hit_group.IntersectionShaderImport);
}
}
vkd3d_free(subobjects);
}
static VkShaderStageFlagBits convert_stage(dxil_spv_shader_stage stage)
{
/* Only interested in RT entry_points. There is no way yet to use lib_6_3+ for non-RT. */
@ -1421,95 +1360,20 @@ static bool vkd3d_dxil_build_entry(struct vkd3d_shader_library_entry_point *entr
return true;
}
static void vkd3d_shader_dxil_copy_subobject(unsigned int identifier,
struct vkd3d_shader_library_subobject *subobject,
const dxil_spv_rdat_subobject *dxil_subobject)
{
unsigned int i;
/* Reuse same enums as DXIL. */
subobject->kind = (enum vkd3d_shader_subobject_kind)dxil_subobject->kind;
subobject->name = dxil_subobject->subobject_name;
subobject->dxil_identifier = identifier;
switch (dxil_subobject->kind)
{
case DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE:
case DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE:
subobject->data.payload.data = dxil_subobject->payload;
subobject->data.payload.size = dxil_subobject->payload_size;
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG:
/* Normalize the kind. */
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
subobject->data.pipeline_config.Flags = 0;
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1:
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
subobject->data.pipeline_config.Flags = dxil_subobject->args[1];
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG:
subobject->data.shader_config.MaxPayloadSizeInBytes = dxil_subobject->args[0];
subobject->data.shader_config.MaxAttributeSizeInBytes = dxil_subobject->args[1];
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP:
/* Enum aliases. */
subobject->data.hit_group.Type = (D3D12_HIT_GROUP_TYPE)dxil_subobject->hit_group_type;
assert(dxil_subobject->num_exports == 3);
/* Implementation simplifies a lot if we can reuse the D3D12 type here. */
subobject->data.hit_group.HitGroupExport = vkd3d_dup_entry_point(dxil_subobject->subobject_name);
subobject->data.hit_group.AnyHitShaderImport = dxil_subobject->exports[0] && *dxil_subobject->exports[0] != '\0' ?
vkd3d_dup_entry_point(dxil_subobject->exports[0]) : NULL;
subobject->data.hit_group.ClosestHitShaderImport = dxil_subobject->exports[1] && *dxil_subobject->exports[1] != '\0' ?
vkd3d_dup_entry_point(dxil_subobject->exports[1]) : NULL;
subobject->data.hit_group.IntersectionShaderImport = dxil_subobject->exports[2] && *dxil_subobject->exports[2] != '\0' ?
vkd3d_dup_entry_point(dxil_subobject->exports[2]) : NULL;
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG:
subobject->data.object_config.Flags = dxil_subobject->args[0];
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION:
assert(dxil_subobject->num_exports >= 1);
subobject->data.association.SubobjectToAssociate = vkd3d_dup_entry_point(dxil_subobject->exports[0]);
subobject->data.association.pExports = vkd3d_malloc((dxil_subobject->num_exports - 1) * sizeof(LPCWSTR));
subobject->data.association.NumExports = dxil_subobject->num_exports - 1;
for (i = 1; i < dxil_subobject->num_exports; i++)
subobject->data.association.pExports[i - 1] = vkd3d_dup_entry_point(dxil_subobject->exports[i]);
break;
default:
FIXME("Unrecognized RDAT subobject type: %u.\n", dxil_subobject->kind);
break;
}
}
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
int vkd3d_shader_dxil_append_library_entry_points(
const D3D12_DXIL_LIBRARY_DESC *library_desc,
unsigned int identifier,
struct vkd3d_shader_library_entry_point **entry_points,
size_t *entry_point_size, size_t *entry_point_count,
struct vkd3d_shader_library_subobject **subobjects,
size_t *subobjects_size, size_t *subobjects_count)
size_t *entry_point_size, size_t *entry_point_count)
{
struct vkd3d_shader_library_entry_point new_entry;
struct vkd3d_shader_library_subobject *subobject;
dxil_spv_parsed_blob blob = NULL;
struct vkd3d_shader_code code;
dxil_spv_rdat_subobject sub;
dxil_spv_shader_stage stage;
const char *mangled_entry;
char *ascii_entry = NULL;
vkd3d_shader_hash_t hash;
unsigned int count, i, j;
unsigned int rdat_count;
unsigned int count, i;
int ret = VKD3D_OK;
memset(&new_entry, 0, sizeof(new_entry));
@ -1530,8 +1394,6 @@ int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
goto end;
}
rdat_count = dxil_spv_parsed_blob_get_num_rdat_subobjects(blob);
if (library_desc->NumExports)
{
for (i = 0; i < library_desc->NumExports; i++)
@ -1541,44 +1403,24 @@ int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
else
ascii_entry = vkd3d_strdup_w_utf8(library_desc->pExports[i].Name, 0);
/* An export can point to a subobject or an entry point. */
for (j = 0; j < rdat_count; j++)
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
if (stage == DXIL_SPV_STAGE_UNKNOWN)
{
dxil_spv_parsed_blob_get_rdat_subobject(blob, j, &sub);
/* Subobject names are not mangled. */
if (strcmp(sub.subobject_name, ascii_entry) == 0)
break;
ret = VKD3D_ERROR_INVALID_ARGUMENT;
goto end;
}
if (j < rdat_count)
{
vkd3d_array_reserve((void**)subobjects, subobjects_size,
*subobjects_count + 1, sizeof(**subobjects));
subobject = &(*subobjects)[*subobjects_count];
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
*subobjects_count += 1;
}
else
{
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
if (stage == DXIL_SPV_STAGE_UNKNOWN)
{
ret = VKD3D_ERROR_INVALID_ARGUMENT;
goto end;
}
new_entry.real_entry_point = ascii_entry;
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
new_entry.mangled_entry_point = NULL;
new_entry.identifier = identifier;
new_entry.stage = convert_stage(stage);
ascii_entry = NULL;
new_entry.real_entry_point = ascii_entry;
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
new_entry.mangled_entry_point = NULL;
new_entry.identifier = identifier;
new_entry.stage = convert_stage(stage);
ascii_entry = NULL;
vkd3d_array_reserve((void**)entry_points, entry_point_size,
*entry_point_count + 1, sizeof(new_entry));
(*entry_points)[(*entry_point_count)++] = new_entry;
memset(&new_entry, 0, sizeof(new_entry));
}
vkd3d_array_reserve((void**)entry_points, entry_point_size,
*entry_point_count + 1, sizeof(new_entry));
(*entry_points)[(*entry_point_count)++] = new_entry;
memset(&new_entry, 0, sizeof(new_entry));
}
}
else
@ -1611,21 +1453,6 @@ int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
(*entry_points)[(*entry_point_count)++] = new_entry;
memset(&new_entry, 0, sizeof(new_entry));
}
if (rdat_count)
{
/* All subobjects are also exported. */
vkd3d_array_reserve((void**)subobjects, subobjects_size,
*subobjects_count + rdat_count, sizeof(**subobjects));
for (i = 0; i < rdat_count; i++)
{
dxil_spv_parsed_blob_get_rdat_subobject(blob, i, &sub);
subobject = &(*subobjects)[*subobjects_count];
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
*subobjects_count += 1;
}
}
}
end:

View File

@ -1404,13 +1404,6 @@ static uint32_t vkd3d_spirv_build_op_logical_and(struct vkd3d_spirv_builder *bui
SpvOpLogicalAnd, result_type, operand0, operand1);
}
static uint32_t vkd3d_spirv_build_op_any(struct vkd3d_spirv_builder *builder,
uint32_t result_type, uint32_t operand0)
{
return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream,
SpvOpAny, result_type, operand0);
}
static uint32_t vkd3d_spirv_build_op_iequal(struct vkd3d_spirv_builder *builder,
uint32_t result_type, uint32_t operand0, uint32_t operand1)
{
@ -1938,12 +1931,12 @@ vkd3d_spirv_resource_type_table[] =
{VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2, 2},
{VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2, 2},
{VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3, 3},
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 3},
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 0},
{VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, 1,
SpvCapabilitySampled1D, SpvCapabilityImage1D},
{VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3, 2},
{VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3, 2},
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 3,
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 0,
SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray},
};
@ -2330,8 +2323,6 @@ struct vkd3d_dxbc_compiler
vkd3d_shader_hash_t descriptor_qa_shader_hash;
#endif
uint32_t robust_physical_counter_func_id;
int compiler_error;
};
@ -3521,17 +3512,8 @@ static uint32_t vkd3d_dxbc_compiler_emit_load_constant_buffer(struct vkd3d_dxbc_
}
}
if (access_mask == SpvMemoryAccessAlignedMask)
{
/* For physical pointers, prefer InBounds for optimal codegen. */
ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id,
base_id, indexes, last_index + 1);
}
else
{
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
base_id, indexes, last_index + 1);
}
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
base_id, indexes, last_index + 1);
if (reg->modifier == VKD3DSPRM_NONUNIFORM)
vkd3d_dxbc_compiler_decorate_nonuniform(compiler, ptr_id);
@ -5531,22 +5513,31 @@ static const struct vkd3d_shader_global_binding *vkd3d_dxbc_compiler_get_global_
{
if (binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA)
{
uint32_t struct_id, array_type_id;
uint32_t counter_struct_id, pointer_struct_id, array_type_id;
counter_struct_id = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 1);
counter_struct_id = vkd3d_spirv_build_op_type_struct(builder, &counter_struct_id, 1);
vkd3d_spirv_build_op_member_decorate1(builder, counter_struct_id, 0, SpvDecorationOffset, 0);
vkd3d_spirv_build_op_decorate(builder, counter_struct_id, SpvDecorationBlock, NULL, 0);
vkd3d_spirv_build_op_name(builder, counter_struct_id, "uav_ctr_t");
type_id = vkd3d_spirv_build_op_type_pointer(builder, SpvStorageClassPhysicalStorageBuffer, counter_struct_id);
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 2);
array_type_id = vkd3d_spirv_build_op_type_runtime_array(builder, type_id);
vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, sizeof(uint64_t));
struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1);
vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0);
vkd3d_spirv_build_op_member_decorate(builder, struct_id, 0, SpvDecorationNonWritable, NULL, 0);
vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBufferBlock, NULL, 0);
vkd3d_spirv_build_op_name(builder, struct_id, "uav_ctrs_t");
pointer_struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1);
vkd3d_spirv_build_op_member_decorate1(builder, pointer_struct_id, 0, SpvDecorationOffset, 0);
vkd3d_spirv_build_op_decorate(builder, pointer_struct_id, SpvDecorationBufferBlock, NULL, 0);
vkd3d_spirv_build_op_name(builder, pointer_struct_id, "uav_ctrs_t");
var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream,
vkd3d_spirv_get_op_type_pointer(builder, storage_class, struct_id),
vkd3d_spirv_get_op_type_pointer(builder, storage_class, pointer_struct_id),
storage_class, 0);
vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationAliasedPointer, NULL, 0);
vkd3d_spirv_enable_capability(builder, SpvCapabilityPhysicalStorageBufferAddresses);
}
else
@ -5719,116 +5710,10 @@ static const struct vkd3d_shader_buffer_reference_type *vkd3d_dxbc_compiler_get_
static void vkd3d_dxbc_compiler_emit_descriptor_qa_checks(struct vkd3d_dxbc_compiler *compiler);
#endif
static void vkd3d_dxbc_compiler_emit_robust_physical_counter_func(struct vkd3d_dxbc_compiler *compiler)
{
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
uint32_t not_equal_vec_id, not_equal_id;
uint32_t merge_label_id, body_label_id;
uint32_t ptr_type_id, ptr_id;
uint32_t parameter_types[3];
uint32_t parameter_ids[3];
uint32_t phi_arguments[4];
uint32_t atomic_args[4];
uint32_t func_type_id;
uint32_t phi_result_id;
uint32_t uvec2_type;
uint32_t bvec2_type;
uint32_t result_id;
uint32_t bool_type;
uint32_t u32_type;
uint32_t label_id;
uint32_t zero_id;
unsigned int i;
bool_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_BOOL, 1);
bvec2_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_BOOL, 2);
u32_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 1);
uvec2_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 2);
for (i = 0; i < ARRAY_SIZE(parameter_types); i++)
parameter_types[i] = i == 0 ? uvec2_type : u32_type;
func_type_id = vkd3d_spirv_get_op_type_function(builder, u32_type,
parameter_types, ARRAY_SIZE(parameter_types));
compiler->robust_physical_counter_func_id = vkd3d_spirv_alloc_id(builder);
vkd3d_spirv_build_op_name(builder, compiler->robust_physical_counter_func_id, "robust_physical_counter_op");
vkd3d_spirv_build_op_function(builder, u32_type, compiler->robust_physical_counter_func_id,
SpvFunctionControlMaskNone, func_type_id);
for (i = 0; i < ARRAY_SIZE(parameter_ids); i++)
parameter_ids[i] = vkd3d_spirv_build_op_function_parameter(builder, i == 0 ? uvec2_type : u32_type);
vkd3d_spirv_build_op_name(builder, parameter_ids[0], "bda");
vkd3d_spirv_build_op_name(builder, parameter_ids[1], "direction");
vkd3d_spirv_build_op_name(builder, parameter_ids[2], "fixup");
label_id = vkd3d_spirv_alloc_id(builder);
merge_label_id = vkd3d_spirv_alloc_id(builder);
body_label_id = vkd3d_spirv_alloc_id(builder);
zero_id = vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, 0, 2);
vkd3d_spirv_build_op_label(builder, label_id);
not_equal_vec_id = vkd3d_spirv_build_op_inotequal(builder, bvec2_type,
parameter_ids[0], zero_id);
not_equal_id = vkd3d_spirv_build_op_any(builder, bool_type, not_equal_vec_id);
vkd3d_spirv_build_op_selection_merge(builder, merge_label_id, SpvSelectionControlMaskNone);
vkd3d_spirv_build_op_branch_conditional(builder, not_equal_id, body_label_id, merge_label_id);
phi_arguments[1] = body_label_id;
phi_arguments[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0);
phi_arguments[3] = label_id;
{
vkd3d_spirv_build_op_label(builder, body_label_id);
ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPhysicalStorageBuffer, u32_type);
ptr_id = vkd3d_spirv_build_op_bitcast(builder, ptr_type_id, parameter_ids[0]);
atomic_args[0] = ptr_id;
atomic_args[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice);
atomic_args[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvMemoryAccessMaskNone);
atomic_args[3] = parameter_ids[1];
result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
SpvOpAtomicIAdd, u32_type,
atomic_args, ARRAY_SIZE(atomic_args));
phi_arguments[0] = vkd3d_spirv_build_op_iadd(builder, u32_type,
result_id, parameter_ids[2]);
vkd3d_spirv_build_op_branch(builder, merge_label_id);
}
vkd3d_spirv_build_op_label(builder, merge_label_id);
phi_result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
SpvOpPhi, u32_type,
phi_arguments, ARRAY_SIZE(phi_arguments));
vkd3d_spirv_build_op_return_value(builder, phi_result_id);
vkd3d_spirv_build_op_function_end(builder);
vkd3d_spirv_enable_capability(builder, SpvCapabilityPhysicalStorageBufferAddresses);
}
static uint32_t vkd3d_dxbc_compiler_emit_robust_physical_counter(struct vkd3d_dxbc_compiler *compiler,
uint32_t bda_id, bool increment)
{
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
uint32_t u32_type;
uint32_t args[3];
u32_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 1);
args[0] = bda_id;
args[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, increment ? 1u : -1u);
args[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, increment ? 0u : -1u);
return vkd3d_spirv_build_op_function_call(builder, u32_type,
compiler->robust_physical_counter_func_id,
args, ARRAY_SIZE(args));
}
static void vkd3d_dxbc_compiler_emit_initial_declarations(struct vkd3d_dxbc_compiler *compiler)
{
const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->shader_interface.xfb_info;
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
unsigned int i;
switch (compiler->shader_type)
{
@ -5878,19 +5763,6 @@ static void vkd3d_dxbc_compiler_emit_initial_declarations(struct vkd3d_dxbc_comp
vkd3d_dxbc_compiler_emit_descriptor_qa_checks(compiler);
#endif
if (compiler->scan_info->has_uav_counter)
{
/* Check if we're expected to deal with RAW VAs. In this case we will enable BDA. */
for (i = 0; i < compiler->shader_interface.binding_count; i++)
{
if (compiler->shader_interface.bindings[i].flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA)
{
vkd3d_dxbc_compiler_emit_robust_physical_counter_func(compiler);
break;
}
}
}
if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL)
{
vkd3d_spirv_builder_begin_main_function(builder);
@ -6562,13 +6434,8 @@ static void vkd3d_dxbc_compiler_emit_dcl_constant_buffer(struct vkd3d_dxbc_compi
else if (binding && (binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA))
{
storage_class = SpvStorageClassPhysicalStorageBuffer;
/* Could use cb->size here, but we will use InBounds access chains
* which could confuse a compiler if we tried
* to access an array out of bounds. Robustness on descriptors depends on the descriptor, not the
* declaration, and it's possible to declare a CBV with fewer array elements than you access.
* In this case, we pretend to have a 64 KiB descriptor. */
type_id = vkd3d_dxbc_compiler_get_buffer_reference_type(compiler,
VKD3D_DATA_FLOAT, 4, 4 * 1024, 0)->type_id;
VKD3D_DATA_FLOAT, 4, cb->size, 0)->type_id;
var_id = compiler->root_parameter_var_id;
}
else
@ -9510,9 +9377,9 @@ static void vkd3d_dxbc_compiler_emit_gather4(struct vkd3d_dxbc_compiler *compile
unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED;
SpvImageOperandsMask operands_mask = 0;
unsigned int image_operand_count = 0;
uint32_t image_operands[1] = { 0 };
struct vkd3d_shader_image image;
unsigned int component_idx;
uint32_t image_operands[1];
DWORD coordinate_mask;
bool extended_offset;
bool is_sparse_op;
@ -9811,19 +9678,7 @@ static void vkd3d_dxbc_compiler_emit_ld_raw_structured_srv_uav(struct vkd3d_dxbc
uint32_t indices[2];
indices[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0);
indices[1] = coordinate_id;
if (access_mask == SpvMemoryAccessAlignedMask)
{
/* For physical pointers, prefer InBounds for optimal codegen. */
ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id,
image.id, indices, ARRAY_SIZE(indices));
}
else
{
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
image.id, indices, ARRAY_SIZE(indices));
}
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, image.id, indices, ARRAY_SIZE(indices));
constituents[j++] = vkd3d_spirv_build_op_loadv(builder, type_id, ptr_id, access_mask, &alignment, 1);
if (resource->reg.modifier == VKD3DSPRM_NONUNIFORM)
@ -9963,17 +9818,7 @@ static void vkd3d_dxbc_compiler_emit_store_uav_raw_structured(struct vkd3d_dxbc_
if (component_count > 1)
texel_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, texel_id, component_idx);
if (access_mask == SpvMemoryAccessAlignedMask)
{
ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id,
image.id, indices, ARRAY_SIZE(indices));
}
else
{
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
image.id, indices, ARRAY_SIZE(indices));
}
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, image.id, indices, ARRAY_SIZE(indices));
vkd3d_spirv_build_op_storev(builder, ptr_id, texel_id, access_mask, &alignment, 1);
if (dst->reg.modifier == VKD3DSPRM_NONUNIFORM)
@ -10138,7 +9983,6 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
const struct vkd3d_shader_resource_binding *binding;
uint32_t type_id, result_id, pointer_id, zero_id;
const struct vkd3d_symbol *resource_symbol;
bool check_post_decrement;
uint32_t operands[3];
SpvOp op;
@ -10154,6 +9998,7 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
if (binding && (binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA))
{
uint32_t ctr_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPhysicalStorageBuffer, type_id);
uint32_t buf_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, resource_symbol->info.resource.uav_counter_type_id);
uint32_t indices[2];
@ -10168,10 +10013,8 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
resource_symbol->info.resource.uav_counter_type_id,
pointer_id, SpvMemoryAccessMaskNone);
result_id = vkd3d_dxbc_compiler_emit_robust_physical_counter(compiler, pointer_id,
instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC);
check_post_decrement = false;
pointer_id = vkd3d_spirv_build_op_access_chain1(builder,
ctr_ptr_type_id, pointer_id, zero_id);
}
else if (binding && (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS))
{
@ -10191,8 +10034,6 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
/* Need to mark the pointer argument itself as non-uniform. */
if (src->reg.modifier == VKD3DSPRM_NONUNIFORM)
vkd3d_dxbc_compiler_decorate_nonuniform(compiler, pointer_id);
check_post_decrement = true;
}
else
{
@ -10200,25 +10041,19 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
pointer_id = vkd3d_spirv_build_op_image_texel_pointer(builder, ptr_type_id,
resource_symbol->info.resource.uav_counter_id, zero_id, zero_id);
check_post_decrement = true;
}
if (check_post_decrement)
operands[0] = pointer_id;
operands[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice);
operands[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, memory_semantics);
result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
op, type_id, operands, ARRAY_SIZE(operands));
if (op == SpvOpAtomicIDecrement)
{
operands[0] = pointer_id;
operands[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice);
operands[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, memory_semantics);
result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
op, type_id, operands, ARRAY_SIZE(operands));
if (op == SpvOpAtomicIDecrement)
{
/* SpvOpAtomicIDecrement returns the original value. */
result_id = vkd3d_spirv_build_op_isub(builder, type_id, result_id,
vkd3d_dxbc_compiler_get_constant_uint(compiler, 1));
}
/* SpvOpAtomicIDecrement returns the original value. */
result_id = vkd3d_spirv_build_op_isub(builder, type_id, result_id,
vkd3d_dxbc_compiler_get_constant_uint(compiler, 1));
}
vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, result_id);
}

View File

@ -477,7 +477,6 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_info *
const struct vkd3d_shader_register *reg)
{
scan_info->has_side_effects = true;
scan_info->has_uav_counter = true;
vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_ATOMIC_COUNTER);
}

View File

@ -19,8 +19,6 @@
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
#define RT_TRACE TRACE
void vkd3d_acceleration_structure_build_info_cleanup(
struct vkd3d_acceleration_structure_build_info *info)
{
@ -76,31 +74,19 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
bool have_triangles, have_aabbs;
unsigned int i;
RT_TRACE("Converting inputs.\n");
RT_TRACE("=====================\n");
build_info = &info->build_info;
memset(build_info, 0, sizeof(*build_info));
build_info->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
if (desc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
{
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
RT_TRACE("Top level build.\n");
}
else
{
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
RT_TRACE("Bottom level build.\n");
}
build_info->flags = d3d12_build_flags_to_vk(desc->Flags);
if (desc->Flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
{
RT_TRACE("BUILD_FLAG_PERFORM_UPDATE.\n");
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
}
else
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
@ -123,9 +109,6 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
info->primitive_counts = info->primitive_counts_stack;
info->primitive_counts[0] = desc->NumDescs;
build_info->geometryCount = 1;
RT_TRACE(" ArrayOfPointers: %u.\n",
desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? 1 : 0);
RT_TRACE(" NumDescs: %u.\n", info->primitive_counts[0]);
}
else
{
@ -149,21 +132,13 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
for (i = 0; i < desc->NumDescs; i++)
{
info->geometries[i].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
RT_TRACE(" Geom %u:\n", i);
if (desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
{
geom_desc = desc->ppGeometryDescs[i];
RT_TRACE(" ArrayOfPointers\n");
}
else
{
geom_desc = &desc->pGeometryDescs[i];
RT_TRACE(" PointerToArray\n");
}
info->geometries[i].flags = d3d12_geometry_flags_to_vk(geom_desc->Flags);
RT_TRACE(" Flags = #%x\n", geom_desc->Flags);
switch (geom_desc->Type)
{
@ -180,26 +155,17 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
triangles = &info->geometries[i].geometry.triangles;
triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
triangles->indexData.deviceAddress = geom_desc->Triangles.IndexBuffer;
if (geom_desc->Triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
if (geom_desc->Triangles.IndexBuffer)
{
if (!geom_desc->Triangles.IndexBuffer)
WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
triangles->indexType =
geom_desc->Triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
info->primitive_counts[i] = geom_desc->Triangles.IndexCount / 3;
RT_TRACE(" Indexed : Index count = %u (%u bits)\n",
geom_desc->Triangles.IndexCount,
triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
RT_TRACE(" Vertex count: %u\n", geom_desc->Triangles.VertexCount);
RT_TRACE(" IBO VA: %"PRIx64".\n", geom_desc->Triangles.IndexBuffer);
}
else
{
info->primitive_counts[i] = geom_desc->Triangles.VertexCount / 3;
triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
RT_TRACE(" Triangle list : Vertex count: %u\n", geom_desc->Triangles.VertexCount);
}
triangles->maxVertex = max(1, geom_desc->Triangles.VertexCount) - 1;
@ -207,11 +173,6 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->Triangles.VertexFormat);
triangles->vertexData.deviceAddress = geom_desc->Triangles.VertexBuffer.StartAddress;
triangles->transformData.deviceAddress = geom_desc->Triangles.Transform3x4;
RT_TRACE(" Transform3x4: %s\n", geom_desc->Triangles.Transform3x4 ? "on" : "off");
RT_TRACE(" Vertex format: %s\n", debug_dxgi_format(geom_desc->Triangles.VertexFormat));
RT_TRACE(" VBO VA: %"PRIx64"\n", geom_desc->Triangles.VertexBuffer.StartAddress);
RT_TRACE(" Vertex stride: %"PRIu64" bytes\n", geom_desc->Triangles.VertexBuffer.StrideInBytes);
break;
case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
@ -229,15 +190,12 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
aabbs->stride = geom_desc->AABBs.AABBs.StrideInBytes;
aabbs->data.deviceAddress = geom_desc->AABBs.AABBs.StartAddress;
info->primitive_counts[i] = geom_desc->AABBs.AABBCount;
RT_TRACE(" AABB stride: %"PRIu64" bytes\n", geom_desc->AABBs.AABBs.StrideInBytes);
break;
default:
FIXME("Unsupported geometry type %u.\n", geom_desc->Type);
return false;
}
RT_TRACE(" Primitive count %u.\n", info->primitive_counts[i]);
}
}
@ -251,8 +209,6 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
}
build_info->pGeometries = info->geometries;
RT_TRACE("=====================\n");
return true;
}
@ -306,18 +262,12 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
type_index = VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE;
stride = sizeof(uint64_t);
}
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE &&
list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
{
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR;
type_index = VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE;
stride = sizeof(uint64_t);
}
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
{
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE;
stride = sizeof(uint64_t);
FIXME("NumBottomLevelPointers will always return 0.\n");
}
else
{
@ -348,31 +298,9 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
{
if (list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
{
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS;
if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
type_index, &vk_query_pool, &vk_query_index))
{
ERR("Failed to allocate query.\n");
return;
}
d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
vk_query_pool, vk_query_index, 1,
vk_buffer, offset + sizeof(uint64_t), stride,
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
}
else
{
FIXME("NumBottomLevelPointers will always return 0.\n");
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
sizeof(uint64_t), 0));
}
/* TODO: We'll need some way to store these values for later use and copy them here instead. */
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
sizeof(uint64_t), 0));
}
}

View File

@ -50,8 +50,6 @@ static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_co
return "dispatch";
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT:
return "execute_indirect";
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE:
return "execute_indirect_template";
case VKD3D_BREADCRUMB_COMMAND_COPY:
return "copy";
case VKD3D_BREADCRUMB_COMMAND_RESOLVE:
@ -84,8 +82,6 @@ static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_co
return "root_desc";
case VKD3D_BREADCRUMB_COMMAND_ROOT_CONST:
return "root_const";
case VKD3D_BREADCRUMB_COMMAND_TAG:
return "tag";
default:
return "?";
@ -310,10 +306,6 @@ static void vkd3d_breadcrumb_tracer_report_command_list(
{
ERR(" Set arg: %"PRIu64" (#%"PRIx64")\n", cmd->word_64bit, cmd->word_64bit);
}
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_TAG)
{
ERR(" Tag: %s\n", cmd->tag);
}
else
{
ERR(" Command: %s\n", vkd3d_breadcrumb_command_type_to_str(cmd->type));

View File

@ -2873,7 +2873,6 @@ static void vkd3d_pipeline_library_disk_cache_merge(struct vkd3d_pipeline_librar
out:
/* There shouldn't be any write cache left after merging. */
vkd3d_file_unmap(&mapped_write_cache);
vkd3d_file_delete(write_path);
/* If we have a stale merge file lying around, we might have been killed at some point
@ -2885,9 +2884,9 @@ out:
vkd3d_file_delete(merge_path);
out_cancellation:
vkd3d_file_unmap(&mapped_write_cache);
if (merge_file)
fclose(merge_file);
vkd3d_file_unmap(&mapped_write_cache);
hash_map_clear(&map);
vkd3d_free(tmp_buffer);
}

File diff suppressed because it is too large Load Diff

View File

@ -61,56 +61,12 @@ void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
#define DEBUG_CHANNEL_WORD_COOKIE 0xdeadca70u
#define DEBUG_CHANNEL_WORD_MASK 0xfffffff0u
static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token token)
{
switch (token)
{
case VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32: return "RootConst";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO: return "IBO VA LO";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI: return "IBO VA HI";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE: return "IBO Size";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT: return "IBO Type";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO: return "VBO VA LO";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI: return "VBO VA HI";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE: return "VBO Size";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE: return "VBO Stride";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO: return "ROOT VA LO";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI: return "ROOT VA HI";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT: return "Vertex Count";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT: return "Index Count";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT: return "Instance Count";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX: return "First Index";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX: return "First Vertex";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE: return "First Instance";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET: return "Vertex Offset";
default: return "???";
}
}
static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token token)
{
switch (token)
{
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI:
return true;
default:
return false;
}
}
static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
uint32_t word_offset, uint32_t message_word_count)
{
uint32_t i, debug_instance, debug_thread_id[3], fmt;
char message_buffer[4096];
uint64_t shader_hash;
size_t len, avail;
if (message_word_count < 8)
{
@ -124,107 +80,52 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring
debug_thread_id[i] = READ_RING_WORD(word_offset + 4 + i);
fmt = READ_RING_WORD(word_offset + 7);
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %u, ID (%u, %u, %u):",
shader_hash, debug_instance,
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
word_offset += 8;
message_word_count -= 8;
if (shader_hash == 0)
for (i = 0; i < message_word_count; i++)
{
/* We got this from our internal debug shaders. Pretty-print.
* Make sure the log is sortable for easier debug.
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
* but that is overengineering at this time ... */
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
if (message_word_count == 2)
union
{
len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
snprintf(message_buffer + len, avail, "DrawCount %u, MaxDrawCount %u",
READ_RING_WORD(word_offset + 0),
READ_RING_WORD(word_offset + 1));
}
else if (message_word_count == 4)
{
union { uint32_t u32; float f32; int32_t s32; } value;
enum vkd3d_patch_command_token token;
uint32_t dst_offset;
uint32_t src_offset;
float f32;
uint32_t u32;
int32_t i32;
} u;
const char *delim;
size_t len, avail;
u.u32 = READ_RING_WORD(word_offset + i);
len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
len = strlen(message_buffer);
if (len + 1 >= sizeof(message_buffer))
break;
avail = sizeof(message_buffer) - len;
token = READ_RING_WORD(word_offset + 0);
dst_offset = READ_RING_WORD(word_offset + 1);
src_offset = READ_RING_WORD(word_offset + 2);
value.u32 = READ_RING_WORD(word_offset + 3);
if (vkd3d_patch_command_token_is_hex(token))
{
snprintf(message_buffer + len, avail, "%s <- #%08x",
vkd3d_patch_command_token_str(token), value.u32);
}
else if (token == VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32)
{
snprintf(message_buffer + len, avail, "%s <- {hex #%08x, s32 %d, f32 %f}",
vkd3d_patch_command_token_str(token), value.u32, value.s32, value.f32);
}
else
{
snprintf(message_buffer + len, avail, "%s <- %d",
vkd3d_patch_command_token_str(token), value.s32);
}
len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
snprintf(message_buffer + len, avail, " (dst offset %u, src offset %u)", dst_offset, src_offset);
}
}
else
{
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %010u, ID (%u, %u, %u):",
shader_hash, debug_instance,
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
for (i = 0; i < message_word_count; i++)
{
union
{
float f32;
uint32_t u32;
int32_t i32;
} u;
const char *delim;
u.u32 = READ_RING_WORD(word_offset + i);
len = strlen(message_buffer);
if (len + 1 >= sizeof(message_buffer))
break;
avail = sizeof(message_buffer) - len;
delim = i == 0 ? " " : ", ";
delim = i == 0 ? " " : ", ";
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
switch ((fmt >> (2u * i)) & 3u)
{
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
break;
switch ((fmt >> (2u * i)) & 3u)
{
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_I32:
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_I32:
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_F32:
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_F32:
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
break;
default:
snprintf(message_buffer + len, avail, "%s????", delim);
break;
}
default:
snprintf(message_buffer + len, avail, "%s????", delim);
break;
}
}

View File

@ -70,7 +70,6 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION_COND(KHR_DEFERRED_HOST_OPERATIONS, KHR_deferred_host_operations, VKD3D_CONFIG_FLAG_DXR),
VK_EXTENSION_COND(KHR_PIPELINE_LIBRARY, KHR_pipeline_library, VKD3D_CONFIG_FLAG_DXR),
VK_EXTENSION_COND(KHR_RAY_QUERY, KHR_ray_query, VKD3D_CONFIG_FLAG_DXR11),
VK_EXTENSION_COND(KHR_RAY_TRACING_MAINTENANCE_1, KHR_ray_tracing_maintenance1, VKD3D_CONFIG_FLAG_DXR11),
VK_EXTENSION(KHR_SPIRV_1_4, KHR_spirv_1_4),
VK_EXTENSION(KHR_SHADER_FLOAT_CONTROLS, KHR_shader_float_controls),
VK_EXTENSION(KHR_FRAGMENT_SHADING_RATE, KHR_fragment_shading_rate),
@ -84,16 +83,9 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(KHR_BIND_MEMORY_2, KHR_bind_memory2),
VK_EXTENSION(KHR_COPY_COMMANDS_2, KHR_copy_commands2),
VK_EXTENSION(KHR_DYNAMIC_RENDERING, KHR_dynamic_rendering),
/* Only required to silence validation errors. */
VK_EXTENSION(KHR_DEPTH_STENCIL_RESOLVE, KHR_depth_stencil_resolve),
VK_EXTENSION(KHR_DRIVER_PROPERTIES, KHR_driver_properties),
VK_EXTENSION(KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, KHR_uniform_buffer_standard_layout),
VK_EXTENSION(KHR_MAINTENANCE_4, KHR_maintenance4),
VK_EXTENSION(KHR_FRAGMENT_SHADER_BARYCENTRIC, KHR_fragment_shader_barycentric),
#ifdef _WIN32
VK_EXTENSION(KHR_EXTERNAL_MEMORY_WIN32, KHR_external_memory_win32),
VK_EXTENSION(KHR_EXTERNAL_SEMAPHORE_WIN32, KHR_external_semaphore_win32),
#endif
/* EXT extensions */
VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps),
VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering),
@ -131,7 +123,6 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(NV_FRAGMENT_SHADER_BARYCENTRIC, NV_fragment_shader_barycentric),
VK_EXTENSION(NV_COMPUTE_SHADER_DERIVATIVES, NV_compute_shader_derivatives),
VK_EXTENSION_COND(NV_DEVICE_DIAGNOSTIC_CHECKPOINTS, NV_device_diagnostic_checkpoints, VKD3D_CONFIG_FLAG_BREADCRUMBS),
VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS, NV_device_generated_commands),
/* VALVE extensions */
VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type),
VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping),
@ -487,14 +478,6 @@ static void vkd3d_init_debug_messenger_callback(struct vkd3d_instance *instance)
instance->vk_debug_callback = callback;
}
/* Could be a flag style enum if needed. */
enum vkd3d_application_feature_override
{
VKD3D_APPLICATION_FEATURE_OVERRIDE_NONE = 0,
VKD3D_APPLICATION_FEATURE_OVERRIDE_PROMOTE_DXR_TO_ULTIMATE,
};
static enum vkd3d_application_feature_override vkd3d_application_feature_override;
uint64_t vkd3d_config_flags;
struct vkd3d_shader_quirk_info vkd3d_shader_quirk_info;
@ -504,19 +487,13 @@ struct vkd3d_instance_application_meta
const char *name;
uint64_t global_flags_add;
uint64_t global_flags_remove;
enum vkd3d_application_feature_override override;
};
static const struct vkd3d_instance_application_meta application_override[] = {
/* MSVC fails to compile empty array. */
{ VKD3D_STRING_COMPARE_EXACT, "GravityMark.exe", VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE, 0 },
{ VKD3D_STRING_COMPARE_EXACT, "Deathloop.exe", VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE, 0 },
/* Halo Infinite (1240440).
* Game relies on NON_ZEROED committed UAVs to be cleared to zero on allocation.
* This works okay with zerovram on first game boot, but not later, since this memory is guaranteed to be recycled.
* Game also relies on indirectly modifying CBV root descriptors, which means we are forced to rely on RAW_VA_CBV. */
{ VKD3D_STRING_COMPARE_EXACT, "HaloInfinite.exe",
VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV | VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV |
VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK, 0 },
/* The game forgets to do a barrier when going from render pass to compute. */
{ VKD3D_STRING_COMPARE_EXACT, "Deathloop.exe",
VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE | VKD3D_CONFIG_FLAG_WORKAROUND_MISSING_COLOR_COMPUTE_BARRIERS, 0 },
/* Shadow of the Tomb Raider (750920).
* Invariant workarounds actually cause more issues than they resolve on NV.
* RADV already has workarounds by default.
@ -532,20 +509,6 @@ static const struct vkd3d_instance_application_meta application_override[] = {
/* Serious Sam 4 (257420).
* Invariant workarounds cause graphical glitches when rendering foliage on NV. */
{ VKD3D_STRING_COMPARE_EXACT, "Sam4.exe", VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION, 0 },
/* Cyberpunk 2077 (1091500). */
{ VKD3D_STRING_COMPARE_EXACT, "Cyberpunk2077.exe", VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION, 0 },
/* Resident Evil: Village (1196590).
* Game relies on mesh + sampler feedback to be exposed to use DXR.
* Likely used as a proxy for Turing+ to avoid potential software fallbacks on Pascal. */
{ VKD3D_STRING_COMPARE_EXACT, "re8.exe",
VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16, 0, VKD3D_APPLICATION_FEATURE_OVERRIDE_PROMOTE_DXR_TO_ULTIMATE },
/* Resident Evil 2 remake (883710). Same as RE: Village. */
{ VKD3D_STRING_COMPARE_EXACT, "re2.exe",
VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16, 0, VKD3D_APPLICATION_FEATURE_OVERRIDE_PROMOTE_DXR_TO_ULTIMATE },
{ VKD3D_STRING_COMPARE_EXACT, "re3.exe",
VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16, 0, VKD3D_APPLICATION_FEATURE_OVERRIDE_PROMOTE_DXR_TO_ULTIMATE },
{ VKD3D_STRING_COMPARE_EXACT, "re7.exe",
VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16, 0, VKD3D_APPLICATION_FEATURE_OVERRIDE_PROMOTE_DXR_TO_ULTIMATE },
{ VKD3D_STRING_COMPARE_NEVER, NULL, 0, 0 }
};
@ -566,7 +529,7 @@ static const struct vkd3d_shader_quirk_info ue4_quirks = {
ue4_hashes, ARRAY_SIZE(ue4_hashes), 0,
};
static const struct vkd3d_shader_quirk_info f1_2019_2020_quirks = {
static const struct vkd3d_shader_quirk_info f1_2020_quirks = {
NULL, 0, VKD3D_SHADER_QUIRK_FORCE_TGSM_BARRIERS,
};
@ -574,9 +537,7 @@ static const struct vkd3d_shader_quirk_meta application_shader_quirks[] = {
/* Unreal Engine 4 */
{ VKD3D_STRING_COMPARE_ENDS_WITH, "-Shipping.exe", &ue4_quirks },
/* F1 2020 (1080110) */
{ VKD3D_STRING_COMPARE_EXACT, "F1_2020_dx12.exe", &f1_2019_2020_quirks },
/* F1 2019 (928600) */
{ VKD3D_STRING_COMPARE_EXACT, "F1_2019_dx12.exe", &f1_2019_2020_quirks },
{ VKD3D_STRING_COMPARE_EXACT, "F1_2020_dx12.exe", &f1_2020_quirks },
/* MSVC fails to compile empty array. */
{ VKD3D_STRING_COMPARE_NEVER, NULL, NULL },
};
@ -596,7 +557,6 @@ static void vkd3d_instance_apply_application_workarounds(void)
vkd3d_config_flags &= ~application_override[i].global_flags_remove;
INFO("Detected game %s, adding config 0x%"PRIx64", removing masks 0x%"PRIx64".\n",
app, application_override[i].global_flags_add, application_override[i].global_flags_remove);
vkd3d_application_feature_override = application_override[i].override;
break;
}
}
@ -692,8 +652,6 @@ static const struct vkd3d_debug_option vkd3d_config_options[] =
{"breadcrumbs", VKD3D_CONFIG_FLAG_BREADCRUMBS},
{"pipeline_library_app_cache", VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY},
{"shader_cache_sync", VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC},
{"force_raw_va_cbv", VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV},
{"allow_sbt_collection", VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION},
};
static void vkd3d_config_flags_init_once(void)
@ -1396,12 +1354,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->features2, &info->ray_query_features);
}
if (vulkan_info->KHR_ray_tracing_maintenance1)
{
info->ray_tracing_maintenance1_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR;
vk_prepend_struct(&info->features2, &info->ray_tracing_maintenance1_features);
}
if (vulkan_info->KHR_shader_float_controls)
{
info->float_control_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
@ -1433,20 +1385,13 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->properties2, &info->shader_integer_dot_product_properties);
}
if (vulkan_info->NV_fragment_shader_barycentric && !vulkan_info->KHR_fragment_shader_barycentric)
if (vulkan_info->NV_fragment_shader_barycentric)
{
info->barycentric_features_nv.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_NV;
vk_prepend_struct(&info->features2, &info->barycentric_features_nv);
}
if (vulkan_info->KHR_fragment_shader_barycentric)
{
info->barycentric_features_khr.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR;
vk_prepend_struct(&info->features2, &info->barycentric_features_khr);
}
if (vulkan_info->NV_compute_shader_derivatives)
{
info->compute_shader_derivatives_features_nv.sType =
@ -1454,16 +1399,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->features2, &info->compute_shader_derivatives_features_nv);
}
if (vulkan_info->NV_device_generated_commands)
{
info->device_generated_commands_features_nv.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV;
info->device_generated_commands_properties_nv.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV;
vk_prepend_struct(&info->features2, &info->device_generated_commands_features_nv);
vk_prepend_struct(&info->properties2, &info->device_generated_commands_properties_nv);
}
if (vulkan_info->KHR_shader_atomic_int64)
{
info->shader_atomic_int64_features.sType =
@ -2586,52 +2521,22 @@ static void d3d12_remove_device_singleton(LUID luid)
}
}
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch)
{
struct vkd3d_allocate_heap_memory_info alloc_info;
HRESULT hr;
TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch);
if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE)
{
struct vkd3d_allocate_heap_memory_info alloc_info;
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.heap_desc.SizeInBytes = size;
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
/* We only care about memory types for INDIRECT_PREPROCESS. */
assert(memory_types == ~0u);
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.heap_desc.SizeInBytes = size;
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
}
else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS)
{
struct vkd3d_allocate_memory_info alloc_info;
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
alloc_info.memory_requirements.size = size;
alloc_info.memory_requirements.memoryTypeBits = memory_types;
alloc_info.memory_requirements.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER | VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
}
else
{
return E_INVALIDARG;
}
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
&alloc_info, &scratch->allocation)))
return hr;
scratch->offset = 0;
return S_OK;
@ -2644,47 +2549,35 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con
vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation);
}
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch)
{
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
struct vkd3d_scratch_buffer *candidate;
size_t i;
if (min_size > VKD3D_SCRATCH_BUFFER_SIZE)
return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch);
return d3d12_device_create_scratch_buffer(device, min_size, scratch);
pthread_mutex_lock(&device->mutex);
for (i = pool->scratch_buffer_count; i; i--)
if (device->scratch_buffer_count)
{
candidate = &pool->scratch_buffers[i - 1];
/* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type))
{
*scratch = *candidate;
scratch->offset = 0;
pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count];
pthread_mutex_unlock(&device->mutex);
return S_OK;
}
*scratch = device->scratch_buffers[--device->scratch_buffer_count];
scratch->offset = 0;
pthread_mutex_unlock(&device->mutex);
return S_OK;
}
else
{
pthread_mutex_unlock(&device->mutex);
return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch);
}
pthread_mutex_unlock(&device->mutex);
return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch);
}
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
const struct vkd3d_scratch_buffer *scratch)
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch)
{
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
pthread_mutex_lock(&device->mutex);
if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE &&
pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
{
pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch;
device->scratch_buffers[device->scratch_buffer_count++] = *scratch;
pthread_mutex_unlock(&device->mutex);
}
else
@ -2770,21 +2663,11 @@ static HRESULT d3d12_device_create_query_pool(struct d3d12_device *device, uint3
pool_info.queryCount = 128;
break;
case VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE:
pool_info.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR;
pool_info.queryCount = 128;
break;
case VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE:
pool_info.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
pool_info.queryCount = 128;
break;
case VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS:
pool_info.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR;
pool_info.queryCount = 128;
break;
default:
ERR("Unhandled query type %u.\n", type_index);
return E_INVALIDARG;
@ -2900,14 +2783,55 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(d3d12_device_iface *iface)
return refcount;
}
static HRESULT d3d12_device_global_pipeline_cache_init(struct d3d12_device *device)
{
/* On certain drivers, VkPipelineCache has a fixed (large) memory overhead.
* This means that using a VkPipelineCache per PSO will explode system memory usage, leading to OOM.
* To counteract this, we use one global pipeline cache instead, but this means we lose the ability to
* serialize and unserialize PSO state. Instead, we can just serialize garbage and ignore unserialization.
* From a correctness PoV, this is perfectly fine, and cached PSOs should be present in disk cache either way.
* The bug was introduced in 470 series, but was fixed as of 470.62.02 driver.
* 470.63.01 mainline one was released before 62.02, so it is also included in workaround list. */
bool use_global = false;
VkResult vr;
if (device->device_info.properties2.properties.vendorID == VKD3D_VENDOR_ID_NVIDIA)
{
uint32_t driver_version = device->device_info.properties2.properties.driverVersion;
use_global = (driver_version >= VKD3D_DRIVER_VERSION_MAKE_NV(470, 0, 0) &&
driver_version < VKD3D_DRIVER_VERSION_MAKE_NV(470, 62, 2)) ||
driver_version == VKD3D_DRIVER_VERSION_MAKE_NV(470, 63, 1);
if (use_global)
WARN("Workaround applied. Creating global pipeline cache.\n");
}
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE)
{
INFO("Using global pipeline cache, PSO caches will not be saved to individual blobs.\n");
use_global = true;
}
if (!use_global)
return S_OK;
vr = vkd3d_create_pipeline_cache(device, 0, NULL, &device->global_pipeline_cache);
return hresult_from_vk_result(vr);
}
static void d3d12_device_global_pipeline_cache_cleanup(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VK_CALL(vkDestroyPipelineCache(device->vk_device, device->global_pipeline_cache, NULL));
}
static void d3d12_device_destroy(struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i, j;
size_t i;
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++)
d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]);
for (i = 0; i < device->scratch_buffer_count; i++)
d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]);
for (i = 0; i < device->query_pool_count; i++)
d3d12_device_destroy_query_pool(device, &device->query_pools[i]);
@ -2927,6 +2851,7 @@ static void d3d12_device_destroy(struct d3d12_device *device)
vkd3d_breadcrumb_tracer_cleanup(&device->breadcrumb_tracer, device);
#endif
vkd3d_pipeline_library_flush_disk_cache(&device->disk_cache);
d3d12_device_global_pipeline_cache_cleanup(device);
vkd3d_sampler_state_cleanup(&device->sampler_state, device);
vkd3d_view_map_destroy(&device->sampler_map, device);
vkd3d_meta_ops_cleanup(&device->meta_ops, device);
@ -4405,263 +4330,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(d3d12_device_if
ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access,
const WCHAR *name, HANDLE *handle)
{
#ifdef _WIN32
struct d3d12_device *device = impl_from_ID3D12Device(iface);
const struct vkd3d_vk_device_procs *vk_procs;
struct DxvkSharedTextureMetadata metadata;
ID3D12Resource *resource_iface;
ID3D12Fence *fence_iface;
vk_procs = &device->vk_procs;
TRACE("iface %p, object %p, attributes %p, access %#x, name %s, handle %p\n",
FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n",
iface, object, attributes, access, debugstr_w(name), handle);
if (SUCCEEDED(ID3D12DeviceChild_QueryInterface(object, &IID_ID3D12Resource, (void**)&resource_iface)))
{
struct d3d12_resource *resource = impl_from_ID3D12Resource(resource_iface);
VkMemoryGetWin32HandleInfoKHR win32_handle_info;
VkResult vr;
if (!(resource->heap_flags & D3D12_HEAP_FLAG_SHARED))
{
ID3D12Resource_Release(resource_iface);
return DXGI_ERROR_INVALID_CALL;
}
if (attributes)
FIXME("attributes %p not handled.\n", attributes);
if (access)
FIXME("access %#x not handled.\n", access);
if (name)
FIXME("name %s not handled.\n", debugstr_w(name));
win32_handle_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
win32_handle_info.pNext = NULL;
win32_handle_info.memory = resource->mem.device_allocation.vk_memory;
win32_handle_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
vr = VK_CALL(vkGetMemoryWin32HandleKHR(device->vk_device, &win32_handle_info, handle));
if (vr == VK_SUCCESS)
{
if (resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D)
{
FIXME("Shared texture metadata structure only supports 2D textures.");
}
else
{
metadata.Width = resource->desc.Width;
metadata.Height = resource->desc.Height;
metadata.MipLevels = resource->desc.MipLevels;
metadata.ArraySize = resource->desc.DepthOrArraySize;
metadata.Format = resource->desc.Format;
metadata.SampleDesc = resource->desc.SampleDesc;
metadata.Usage = D3D11_USAGE_DEFAULT;
metadata.BindFlags = D3D11_BIND_SHADER_RESOURCE;
metadata.CPUAccessFlags = 0;
metadata.MiscFlags = D3D11_RESOURCE_MISC_SHARED_NTHANDLE;
if (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)
metadata.BindFlags |= D3D11_BIND_RENDER_TARGET;
if (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
metadata.BindFlags |= D3D11_BIND_DEPTH_STENCIL;
if (resource->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
metadata.BindFlags |= D3D11_BIND_UNORDERED_ACCESS;
if (resource->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
metadata.BindFlags &= ~D3D11_BIND_SHADER_RESOURCE;
if (!vkd3d_set_shared_metadata(*handle, &metadata, sizeof(metadata)))
ERR("Failed to set metadata for shared resource, importing created handle will fail.\n");
}
}
ID3D12Resource_Release(resource_iface);
return vr ? E_FAIL : S_OK;
}
if (SUCCEEDED(ID3D12DeviceChild_QueryInterface(object, &IID_ID3D12Fence, (void**)&fence_iface)))
{
VkSemaphoreGetWin32HandleInfoKHR win32_handle_info;
struct d3d12_shared_fence *fence;
VkResult vr;
if (!is_shared_ID3D12Fence(fence_iface))
{
ID3D12Fence_Release(fence_iface);
return DXGI_ERROR_INVALID_CALL;
}
fence = shared_impl_from_ID3D12Fence(fence_iface);
if (attributes)
FIXME("attributes %p not handled\n", attributes);
if (access)
FIXME("access %#x not handled\n", access);
if (name)
FIXME("name %s not handled\n", debugstr_w(name));
win32_handle_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR;
win32_handle_info.pNext = NULL;
win32_handle_info.semaphore = fence->timeline_semaphore;
win32_handle_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT;
vr = VK_CALL(vkGetSemaphoreWin32HandleKHR(device->vk_device, &win32_handle_info, handle));
ID3D12Fence_Release(fence_iface);
return vr ? E_FAIL : S_OK;
}
FIXME("Creating shared handle for type of object %p unsupported.\n", object);
return E_NOTIMPL;
#else
FIXME("CreateSharedHandle can only be implemented in native Win32.\n");
return E_NOTIMPL;
#endif
}
#ifdef _WIN32
static inline bool handle_is_kmt_style(HANDLE handle)
{
return ((ULONG_PTR)handle & 0x40000000) && ((ULONG_PTR)handle - 2) % 4 == 0;
}
#endif
static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(d3d12_device_iface *iface,
HANDLE handle, REFIID riid, void **object)
{
#ifdef _WIN32
struct d3d12_device *device = impl_from_ID3D12Device(iface);
const struct vkd3d_vk_device_procs *vk_procs;
HRESULT hr;
vk_procs = &device->vk_procs;
TRACE("iface %p, handle %p, riid %s, object %p\n",
FIXME("iface %p, handle %p, riid %s, object %p stub!\n",
iface, handle, debugstr_guid(riid), object);
if (IsEqualGUID(riid, &IID_ID3D12Resource))
{
struct DxvkSharedTextureMetadata metadata;
D3D12_HEAP_PROPERTIES heap_props;
struct d3d12_resource *resource;
D3D12_RESOURCE_DESC1 desc;
bool kmt_handle = false;
if (handle_is_kmt_style(handle))
{
handle = vkd3d_open_kmt_handle(handle);
kmt_handle = true;
if (handle == INVALID_HANDLE_VALUE)
{
WARN("Failed to open KMT-style ID3D12Resource shared handle.\n");
*object = NULL;
return E_INVALIDARG;
}
}
if (!vkd3d_get_shared_metadata(handle, &metadata, sizeof(metadata), NULL))
{
WARN("Failed to get ID3D12Resource shared handle metadata.\n");
if (kmt_handle)
CloseHandle(handle);
*object = NULL;
return E_INVALIDARG;
}
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Alignment = 0;
desc.Width = metadata.Width;
desc.Height = metadata.Height;
desc.DepthOrArraySize = metadata.ArraySize;
desc.MipLevels = metadata.MipLevels;
desc.Format = metadata.Format;
desc.SampleDesc = metadata.SampleDesc;
switch (metadata.TextureLayout)
{
case D3D11_TEXTURE_LAYOUT_UNDEFINED: desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; break;
case D3D11_TEXTURE_LAYOUT_ROW_MAJOR: desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; break;
case D3D11_TEXTURE_LAYOUT_64K_STANDARD_SWIZZLE: desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_STANDARD_SWIZZLE; break;
default: desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
}
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
if (metadata.BindFlags & D3D11_BIND_RENDER_TARGET)
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
if (metadata.BindFlags & D3D11_BIND_DEPTH_STENCIL)
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
if (metadata.BindFlags & D3D11_BIND_UNORDERED_ACCESS)
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
if ((metadata.BindFlags & D3D11_BIND_DEPTH_STENCIL) && !(metadata.BindFlags & D3D11_BIND_SHADER_RESOURCE))
desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
desc.SamplerFeedbackMipRegion.Width = 0;
desc.SamplerFeedbackMipRegion.Height = 0;
desc.SamplerFeedbackMipRegion.Depth = 0;
heap_props.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heap_props.CreationNodeMask = 0;
heap_props.VisibleNodeMask = 0;
hr = d3d12_resource_create_committed(device, &desc, &heap_props,
D3D12_HEAP_FLAG_SHARED, D3D12_RESOURCE_STATE_COMMON, NULL, handle, &resource);
if (kmt_handle)
CloseHandle(handle);
if (FAILED(hr))
{
WARN("Failed to open shared ID3D12Resource, hr %#x.\n", hr);
*object = NULL;
return hr;
}
return return_interface(&resource->ID3D12Resource_iface, &IID_ID3D12Resource, riid, object);
}
if (IsEqualGUID(riid, &IID_ID3D12Fence))
{
VkImportSemaphoreWin32HandleInfoKHR import_info;
struct d3d12_shared_fence *fence;
VkResult vr;
hr = d3d12_shared_fence_create(device, 0, D3D12_FENCE_FLAG_SHARED, &fence);
if (FAILED(hr))
{
WARN("Failed to create object for imported ID3D12Fence, hr %#x.\n", hr);
*object = NULL;
return hr;
}
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
import_info.pNext = NULL;
import_info.semaphore = fence->timeline_semaphore;
import_info.flags = 0;
import_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT;
import_info.handle = handle;
import_info.name = NULL;
vr = VK_CALL(vkImportSemaphoreWin32HandleKHR(device->vk_device, &import_info));
if (vr != VK_SUCCESS)
{
WARN("Failed to open shared ID3D12Fence, vr %d.\n", vr);
ID3D12Fence1_Release(&fence->ID3D12Fence_iface);
*object = NULL;
return E_FAIL;
}
return return_interface(&fence->ID3D12Fence_iface, &IID_ID3D12Fence, riid, object);
}
FIXME("Opening shared handle type %s unsupported\n", debugstr_guid(riid));
return E_NOTIMPL;
#else
FIXME("OpenSharedhandle can only be implemented in native Win32.\n");
return E_NOTIMPL;
#endif
}
static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(d3d12_device_iface *iface,
@ -4695,24 +4376,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(d3d12_device_iface *if
UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence)
{
struct d3d12_device *device = impl_from_ID3D12Device(iface);
struct d3d12_shared_fence *shared_object;
struct d3d12_fence *object;
HRESULT hr;
TRACE("iface %p, intial_value %#"PRIx64", flags %#x, riid %s, fence %p.\n",
iface, initial_value, flags, debugstr_guid(riid), fence);
if (flags & D3D12_FENCE_FLAG_SHARED)
{
if (SUCCEEDED(hr = d3d12_shared_fence_create(device, initial_value, flags, &shared_object)))
return return_interface(&shared_object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence);
if (hr != E_NOTIMPL)
return hr;
FIXME("Shared fences not supported by Vulkan host, returning regular fence.\n");
}
if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object)))
return hr;
@ -4775,10 +4444,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(d3d12_device_i
}
static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(d3d12_device_iface *iface,
const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature_iface,
const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature,
REFIID iid, void **command_signature)
{
struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(root_signature_iface);
struct d3d12_device *device = impl_from_ID3D12Device(iface);
struct d3d12_command_signature *object;
HRESULT hr;
@ -4786,7 +4454,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(d3d12_devic
TRACE("iface %p, desc %p, root_signature %p, iid %s, command_signature %p.\n",
iface, desc, root_signature, debugstr_guid(iid), command_signature);
if (FAILED(hr = d3d12_command_signature_create(device, root_signature, desc, &object)))
if (FAILED(hr = d3d12_command_signature_create(device, desc, &object)))
return hr;
return return_interface(&object->ID3D12CommandSignature_iface,
@ -5265,10 +4933,6 @@ static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePre
info->ResultDataMaxSizeInBytes = size_info.accelerationStructureSize;
info->ScratchDataSizeInBytes = size_info.buildScratchSize;
info->UpdateScratchDataSizeInBytes = size_info.updateScratchSize;
TRACE("ResultDataMaxSizeInBytes: %"PRIu64".\n", info->ResultDataMaxSizeInBytes);
TRACE("ScratchDatSizeInBytes: %"PRIu64".\n", info->ScratchDataSizeInBytes);
TRACE("UpdateScratchDataSizeInBytes: %"PRIu64".\n", info->UpdateScratchDataSizeInBytes);
}
static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(d3d12_device_iface *iface,
@ -5414,7 +5078,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(d3d12_dev
FIXME("Ignoring protected session %p.\n", protected_session);
if (FAILED(hr = d3d12_resource_create_committed(device, desc, heap_properties,
heap_flags, initial_state, optimized_clear_value, NULL, &object)))
heap_flags, initial_state, optimized_clear_value, &object)))
{
*resource = NULL;
return hr;
@ -5816,11 +5480,12 @@ static D3D12_RAYTRACING_TIER d3d12_device_determine_ray_tracing_tier(struct d3d1
}
if (tier == D3D12_RAYTRACING_TIER_1_0 && info->ray_query_features.rayQuery &&
info->ray_tracing_pipeline_features.rayTraversalPrimitiveCulling)
info->ray_tracing_pipeline_features.rayTraversalPrimitiveCulling &&
info->ray_tracing_pipeline_features.rayTracingPipelineTraceRaysIndirect)
{
/* Try to enable DXR 1.1.
* Hide this support behind a CONFIG flag for time being.
* TODO: require VK_KHR_ray_tracing_maintenance1. */
/* Try to enable DXR 1.1. We can support everything from 1.1 with existing spec,
* except ExecuteIndirect DispatchRays().
* Hide this support behind a CONFIG flag for time being. */
supports_vbo_formats = d3d12_device_supports_rtas_formats(device,
required_vbo_formats_tier_11, ARRAY_SIZE(required_vbo_formats_tier_11));
@ -5839,14 +5504,16 @@ static D3D12_RESOURCE_HEAP_TIER d3d12_device_determine_heap_tier(struct d3d12_de
const VkPhysicalDeviceLimits *limits = &device->device_info.properties2.properties.limits;
const struct vkd3d_memory_info *mem_info = &device->memory_info;
const struct vkd3d_memory_info_domain *non_cpu_domain;
const struct vkd3d_memory_info_domain *cpu_domain;
non_cpu_domain = &mem_info->non_cpu_accessible_domain;
cpu_domain = &mem_info->cpu_accessible_domain;
/* Heap Tier 2 requires us to be able to create a heap that supports all resource
* categories at the same time, except RT/DS textures on UPLOAD/READBACK heaps.
* Ignore CPU visible heaps since we only place buffers there. Textures are promoted to committed always. */
// Heap Tier 2 requires us to be able to create a heap that supports all resource
// categories at the same time, except RT/DS textures on UPLOAD/READBACK heaps.
if (limits->bufferImageGranularity > D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT ||
!(non_cpu_domain->buffer_type_mask & non_cpu_domain->sampled_type_mask & non_cpu_domain->rt_ds_type_mask))
!(non_cpu_domain->buffer_type_mask & non_cpu_domain->sampled_type_mask & non_cpu_domain->rt_ds_type_mask) ||
!(cpu_domain->buffer_type_mask & cpu_domain->sampled_type_mask))
return D3D12_RESOURCE_HEAP_TIER_1;
return D3D12_RESOURCE_HEAP_TIER_2;
@ -5991,9 +5658,7 @@ static void d3d12_device_caps_init_feature_options3(struct d3d12_device *device)
D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE;
/* Currently not supported */
options3->ViewInstancingTier = D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED;
options3->BarycentricsSupported =
device->device_info.barycentric_features_nv.fragmentShaderBarycentric ||
device->device_info.barycentric_features_khr.fragmentShaderBarycentric;
options3->BarycentricsSupported = device->device_info.barycentric_features_nv.fragmentShaderBarycentric;
}
static void d3d12_device_caps_init_feature_options4(struct d3d12_device *device)
@ -6260,27 +5925,6 @@ static void d3d12_device_caps_init_shader_model(struct d3d12_device *device)
}
}
static void d3d12_device_caps_override_application(struct d3d12_device *device)
{
/* Some games rely on certain features to be exposed before they let the primary feature
* be exposed. */
switch (vkd3d_application_feature_override)
{
case VKD3D_APPLICATION_FEATURE_OVERRIDE_PROMOTE_DXR_TO_ULTIMATE:
if (device->d3d12_caps.options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_0)
{
device->d3d12_caps.options7.MeshShaderTier = D3D12_MESH_SHADER_TIER_1;
device->d3d12_caps.options7.SamplerFeedbackTier = D3D12_SAMPLER_FEEDBACK_TIER_1_0;
INFO("DXR enabled. Application also requires Mesh/Sampler feedback to be exposed (but unused). "
"Enabling these features automatically.\n");
}
break;
default:
break;
}
}
static void d3d12_device_caps_override(struct d3d12_device *device)
{
D3D_FEATURE_LEVEL fl_override = (D3D_FEATURE_LEVEL)0;
@ -6371,7 +6015,6 @@ static void d3d12_device_caps_init(struct d3d12_device *device)
d3d12_device_caps_init_feature_level(device);
d3d12_device_caps_override(device);
d3d12_device_caps_override_application(device);
}
static void vkd3d_init_shader_extensions(struct d3d12_device *device)
@ -6383,7 +6026,9 @@ static void vkd3d_init_shader_extensions(struct d3d12_device *device)
VKD3D_SHADER_TARGET_EXTENSION_SPV_EXT_DEMOTE_TO_HELPER_INVOCATION;
}
if (device->device_info.shader_integer_dot_product_features.shaderIntegerDotProduct)
if (device->device_info.shader_integer_dot_product_features.shaderIntegerDotProduct &&
device->device_info.shader_integer_dot_product_properties.integerDotProduct4x8BitPackedSignedAccelerated &&
device->device_info.shader_integer_dot_product_properties.integerDotProduct4x8BitPackedUnsignedAccelerated)
{
device->vk_info.shader_extensions[device->vk_info.shader_extension_count++] =
VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT;
@ -6417,21 +6062,6 @@ static void vkd3d_init_shader_extensions(struct d3d12_device *device)
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS;
}
}
if (device->device_info.barycentric_features_khr.fragmentShaderBarycentric)
{
device->vk_info.shader_extensions[device->vk_info.shader_extension_count++] =
VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR;
}
if (device->d3d12_caps.options4.Native16BitShaderOpsSupported &&
(device->device_info.driver_properties.driverID == VK_DRIVER_ID_MESA_RADV ||
(vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16)))
{
/* Native FP16 is buggy on NV for now. */
device->vk_info.shader_extensions[device->vk_info.shader_extension_count++] =
VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT;
}
}
static void vkd3d_compute_shader_interface_key(struct d3d12_device *device)
@ -6556,11 +6186,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
goto out_cleanup_debug_ring;
#endif
if (FAILED(hr = d3d12_device_global_pipeline_cache_init(device)))
goto out_cleanup_breadcrumb_tracer;
if (vkd3d_descriptor_debug_active_qa_checks())
{
if (FAILED(hr = vkd3d_descriptor_debug_alloc_global_info(&device->descriptor_qa_global_info,
VKD3D_DESCRIPTOR_DEBUG_DEFAULT_NUM_COOKIES, device)))
goto out_cleanup_breadcrumb_tracer;
goto out_cleanup_global_pipeline_cache;
}
if ((device->parent = create_info->parent))
@ -6584,6 +6217,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
out_cleanup_descriptor_qa_global_info:
vkd3d_descriptor_debug_free_global_info(device->descriptor_qa_global_info, device);
out_cleanup_global_pipeline_cache:
d3d12_device_global_pipeline_cache_cleanup(device);
out_cleanup_breadcrumb_tracer:
#ifdef VKD3D_ENABLE_BREADCRUMBS
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)

View File

@ -240,7 +240,6 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev
alloc_info.heap_desc = heap->desc;
alloc_info.host_ptr = host_address;
alloc_info.extra_allocation_flags = 0;
if (FAILED(hr = vkd3d_private_store_init(&heap->private_store)))
return hr;

View File

@ -327,39 +327,34 @@ static HRESULT vkd3d_import_host_memory(struct d3d12_device *device, void *host_
void *pNext, struct vkd3d_device_memory_allocation *allocation)
{
VkImportMemoryHostPointerInfoEXT import_info;
HRESULT hr = S_OK;
HRESULT hr;
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
import_info.pNext = pNext;
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
import_info.pHostPointer = host_address;
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK) ||
FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
if (FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
type_flags, type_mask, &import_info, allocation)))
{
if (FAILED(hr))
WARN("Failed to import host memory, hr %#x.\n", hr);
WARN("Failed to import host memory, hr %#x.\n", hr);
/* If we failed, fall back to a host-visible allocation. Generally
* the app will access the memory thorugh the main host pointer,
* so it's fine. */
hr = vkd3d_try_allocate_device_memory(device, size,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
type_mask, pNext, allocation);
type_mask, &import_info, allocation);
}
return hr;
}
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation,
struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
{
if (device->device_info.buffer_device_address_features.bufferDeviceAddress)
allocation->resource.va = vkd3d_get_buffer_device_address(device, allocation->resource.vk_buffer);
else if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
else
allocation->resource.va = 0xdeadbeef;
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
if (!allocation->resource.va)
{
@ -367,9 +362,7 @@ static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocatio
return E_OUTOFMEMORY;
}
/* Internal scratch buffers are not visible to application so we never have to map it back to VkBuffer. */
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
return S_OK;
}
@ -453,12 +446,10 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
if ((allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS) && allocation->resource.va)
{
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
{
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
}
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
}
if (allocation->resource.view_map)
@ -1157,7 +1148,6 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
for (i = 0; i < queue_family->queue_count; i++)
{
vkd3d_queue_add_wait(queue_family->queues[i],
NULL,
clear_queue->vk_semaphore,
clear_queue->next_signal_value);
}
@ -1402,35 +1392,13 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
return hr;
}
static inline bool vkd3d_driver_implicitly_clears(VkDriverId driver_id)
{
switch (driver_id)
{
/* Known to pass test_stress_suballocation which hits this path. */
case VK_DRIVER_ID_MESA_RADV:
case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
return true;
default:
return false;
}
}
HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation)
{
bool implementation_implicitly_clears;
bool needs_clear;
bool suballocate;
HRESULT hr;
suballocate = !info->pNext && !info->host_ptr &&
info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)) &&
!(info->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH);
if (suballocate)
if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)))
hr = vkd3d_suballocate_memory(device, allocator, info, allocation);
else
hr = vkd3d_memory_allocation_init(allocation, device, allocator, info);
@ -1438,20 +1406,8 @@ HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_a
if (FAILED(hr))
return hr;
/* If we're allocating Vulkan memory directly,
* we can rely on the driver doing this for us.
* This is relying on implementation details.
* RADV definitely does this, and it seems like NV also does it.
* TODO: an extension for this would be nice. */
implementation_implicitly_clears =
vkd3d_driver_implicitly_clears(device->device_info.driver_properties.driverID) &&
!suballocate;
needs_clear = !implementation_implicitly_clears &&
!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) &&
!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR);
if (needs_clear)
if (!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) &&
!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR))
vkd3d_memory_allocator_clear_allocation(allocator, device, allocation);
return hr;
@ -1480,7 +1436,6 @@ static bool vkd3d_heap_allocation_accept_deferred_resource_placements(struct d3d
HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_heap_memory_info *info, struct vkd3d_memory_allocation *allocation)
{
struct vkd3d_allocate_heap_memory_info heap_info;
struct vkd3d_allocate_memory_info alloc_info;
HRESULT hr;
@ -1492,31 +1447,9 @@ HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_mem
alloc_info.heap_flags = info->heap_desc.Flags;
alloc_info.host_ptr = info->host_ptr;
alloc_info.flags |= info->extra_allocation_flags;
if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
if (is_cpu_accessible_heap(&info->heap_desc.Properties))
{
if (info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)
{
/* If the heap was only designed to handle images, the heap is useless,
* and we can force everything to go through committed path. */
memset(allocation, 0, sizeof(*allocation));
return S_OK;
}
else
{
/* CPU visible textures are never placed on a heap directly,
* since LINEAR images have alignment / size requirements
* that are vastly different from OPTIMAL ones.
* We can place buffers however. */
heap_info = *info;
info = &heap_info;
heap_info.heap_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
}
}
hr = vkd3d_allocate_memory(device, allocator, &alloc_info, allocation);
if (hr == E_OUTOFMEMORY && vkd3d_heap_allocation_accept_deferred_resource_placements(device,
&info->heap_desc.Properties, info->heap_desc.Flags))

View File

@ -27,8 +27,6 @@ vkd3d_shaders =[
'shaders/vs_swapchain_fullscreen.vert',
'shaders/fs_swapchain_fullscreen.frag',
'shaders/cs_execute_indirect_patch.comp',
'shaders/cs_execute_indirect_patch_debug_ring.comp',
]
vkd3d_src = [
@ -67,10 +65,6 @@ if enable_breadcrumbs
vkd3d_src += ['breadcrumbs.c']
endif
if vkd3d_platform == 'windows'
vkd3d_src += ['shared_metadata.c']
endif
if not enable_d3d12
vkd3d_lib = shared_library('vkd3d-proton', vkd3d_src, glsl_generator.process(vkd3d_shaders), vkd3d_build, vkd3d_version,
dependencies : [ vkd3d_common_dep, vkd3d_shader_dep ] + vkd3d_extra_libs,

View File

@ -1217,144 +1217,6 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
info->data_size = predicate_ops->data_sizes[command_type];
}
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device)
{
VkPushConstantRange push_constant_range;
VkResult vr;
int rc;
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
return hresult_from_errno(rc);
push_constant_range.offset = 0;
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
{
pthread_mutex_destroy(&meta_indirect_ops->mutex);
return hresult_from_vk_result(vr);
}
meta_indirect_ops->pipelines_count = 0;
meta_indirect_ops->pipelines_size = 0;
meta_indirect_ops->pipelines = NULL;
return S_OK;
}
struct vkd3d_meta_execute_indirect_spec_constant_data
{
struct vkd3d_shader_debug_ring_spec_constants constants;
uint32_t workgroup_size_x;
};
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
{
struct vkd3d_meta_execute_indirect_spec_constant_data execute_indirect_spec_constants;
VkSpecializationMapEntry map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES + 1];
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
struct vkd3d_shader_debug_ring_spec_info debug_ring_info;
VkSpecializationInfo spec;
HRESULT hr = S_OK;
VkResult vr;
bool debug;
size_t i;
int rc;
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
{
ERR("Failed to lock mutex, error %d.\n", rc);
return hresult_from_errno(rc);
}
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
{
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
{
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
goto out;
}
}
debug = meta_ops->device->debug_ring.active;
/* If we have debug ring, we can dump indirect command buffer data to the ring as well.
* Vital for debugging broken execute indirect data with templates. */
if (debug)
{
vkd3d_shader_debug_ring_init_spec_constant(meta_ops->device, &debug_ring_info,
0 /* Reserve this hash for internal debug streams. */);
memset(&execute_indirect_spec_constants, 0, sizeof(execute_indirect_spec_constants));
execute_indirect_spec_constants.constants = debug_ring_info.constants;
execute_indirect_spec_constants.workgroup_size_x = patch_command_count;
memcpy(map_entry, debug_ring_info.map_entries, sizeof(debug_ring_info.map_entries));
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].constantID = 4;
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].offset =
offsetof(struct vkd3d_meta_execute_indirect_spec_constant_data, workgroup_size_x);
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].size = sizeof(patch_command_count);
spec.pMapEntries = map_entry;
spec.pData = &execute_indirect_spec_constants;
spec.mapEntryCount = ARRAY_SIZE(map_entry);
spec.dataSize = sizeof(execute_indirect_spec_constants);
}
else
{
map_entry[0].constantID = 0;
map_entry[0].offset = 0;
map_entry[0].size = sizeof(patch_command_count);
spec.pMapEntries = map_entry;
spec.pData = &patch_command_count;
spec.mapEntryCount = 1;
spec.dataSize = sizeof(patch_command_count);
}
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
debug ? sizeof(cs_execute_indirect_patch_debug_ring) : sizeof(cs_execute_indirect_patch),
debug ? cs_execute_indirect_patch_debug_ring : cs_execute_indirect_patch,
meta_indirect_ops->vk_pipeline_layout, &spec,
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
if (vr)
{
hr = hresult_from_vk_result(vr);
goto out;
}
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
meta_indirect_ops->pipelines_count++;
out:
pthread_mutex_unlock(&meta_indirect_ops->mutex);
return hr;
}
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i;
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
pthread_mutex_destroy(&meta_indirect_ops->mutex);
}
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
HRESULT hr;
@ -1380,13 +1242,8 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
goto fail_predicate_ops;
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
goto fail_execute_indirect_ops;
return S_OK;
fail_execute_indirect_ops:
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
fail_predicate_ops:
vkd3d_query_ops_cleanup(&meta_ops->query, device);
fail_query_ops:
@ -1403,7 +1260,6 @@ fail_common:
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
vkd3d_query_ops_cleanup(&meta_ops->query, device);
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);

File diff suppressed because it is too large Load Diff

View File

@ -224,8 +224,7 @@ static bool vkd3d_get_format_compatibility_list(const struct d3d12_device *devic
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
{
/* Legacy D3D11 compatibility rule that allows typed UAV loads on FL11.0 hardware.
* 5.3.9.5 from D3D11 functional spec. 32-bit typeless formats can be viewed as R32{U,I,F}.*/
/* Legacy D3D11 compatibility rule that allows typed UAV loads on FL11.0 hardware */
if (format->byte_count == 4 && format->type == VKD3D_FORMAT_TYPE_TYPELESS)
{
for (i = 0; i < ARRAY_SIZE(r32_uav_formats); i++)
@ -488,7 +487,6 @@ static bool vkd3d_format_check_usage_support(struct d3d12_device *device, VkForm
struct vkd3d_image_create_info
{
struct vkd3d_format_compatibility_list format_compat_list;
VkExternalMemoryImageCreateInfo external_info;
VkImageFormatListCreateInfoKHR format_list;
VkImageCreateInfo image_info;
};
@ -499,7 +497,6 @@ static HRESULT vkd3d_get_image_create_info(struct d3d12_device *device,
struct vkd3d_image_create_info *create_info)
{
struct vkd3d_format_compatibility_list *compat_list = &create_info->format_compat_list;
VkExternalMemoryImageCreateInfo *external_info = &create_info->external_info;
VkImageFormatListCreateInfoKHR *format_list = &create_info->format_list;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkImageCreateInfo *image_info = &create_info->image_info;
@ -524,22 +521,12 @@ static HRESULT vkd3d_get_image_create_info(struct d3d12_device *device,
image_info->sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_info->pNext = NULL;
image_info->flags = 0;
if (resource && (resource->heap_flags & D3D12_HEAP_FLAG_SHARED))
{
external_info->sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
external_info->pNext = NULL;
external_info->handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
image_info->pNext = external_info;
}
if (!(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
{
if (vkd3d_get_format_compatibility_list(device, desc, compat_list))
{
format_list->sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
format_list->pNext = image_info->pNext;
format_list->pNext = NULL;
format_list->viewFormatCount = compat_list->format_count;
format_list->pViewFormats = compat_list->vk_formats;
@ -551,6 +538,8 @@ static HRESULT vkd3d_get_image_create_info(struct d3d12_device *device,
&& desc->Width == desc->Height && desc->DepthOrArraySize >= 6
&& desc->SampleDesc.Count == 1)
image_info->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D)
image_info->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR;
if (sparse_resource)
{
@ -642,11 +631,6 @@ static HRESULT vkd3d_get_image_create_info(struct d3d12_device *device,
if (vkd3d_resource_can_be_vrs(device, heap_properties, desc))
image_info->usage |= VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
/* Additional image flags as necessary */
if (image_info->imageType == VK_IMAGE_TYPE_3D &&
(image_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
image_info->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
use_concurrent = !!(device->unique_queue_mask & (device->unique_queue_mask - 1));
if (!(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS))
@ -1402,12 +1386,18 @@ static void d3d12_resource_get_tiling(struct d3d12_device *device, struct d3d12_
tile_count += packed_tiles;
/* Docs say that we should clear tile_shape to zero if there are no standard mips,
* but this conflicts with all native drivers, so the docs are likely lying here.
* See test_get_resource_tiling() for info. */
tile_shape->WidthInTexels = block_extent.width;
tile_shape->HeightInTexels = block_extent.height;
tile_shape->DepthInTexels = block_extent.depth;
if (standard_mips)
{
tile_shape->WidthInTexels = block_extent.width;
tile_shape->HeightInTexels = block_extent.height;
tile_shape->DepthInTexels = block_extent.depth;
}
else
{
tile_shape->WidthInTexels = 0;
tile_shape->HeightInTexels = 0;
tile_shape->DepthInTexels = 0;
}
*total_tile_count = tile_count;
}
@ -2713,7 +2703,7 @@ static HRESULT d3d12_resource_create(struct d3d12_device *device, uint32_t flags
HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12_RESOURCE_DESC1 *desc,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value, HANDLE shared_handle, struct d3d12_resource **resource)
const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct d3d12_resource *object;
@ -2734,11 +2724,6 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
bool use_dedicated_allocation;
VkResult vr;
#ifdef _WIN32
VkImportMemoryWin32HandleInfoKHR import_info;
VkExportMemoryAllocateInfo export_info;
#endif
if (FAILED(hr = d3d12_resource_create_vk_resource(object, device)))
goto fail;
@ -2771,36 +2756,10 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
else
allocate_info.heap_flags |= D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
if (heap_flags & D3D12_HEAP_FLAG_SHARED)
{
#ifdef _WIN32
use_dedicated_allocation = true;
if (shared_handle && shared_handle != INVALID_HANDLE_VALUE)
{
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
import_info.pNext = allocate_info.pNext;
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
import_info.handle = shared_handle;
import_info.name = NULL;
allocate_info.pNext = &import_info;
}
else
{
export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
export_info.pNext = allocate_info.pNext;
export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
allocate_info.pNext = &export_info;
}
#else
FIXME("D3D12_HEAP_FLAG_SHARED can only be implemented in native Win32.\n");
#endif
}
if (use_dedicated_allocation)
{
dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
dedicated_info.pNext = allocate_info.pNext;
dedicated_info.pNext = NULL;
dedicated_info.image = object->res.vk_image;
dedicated_info.buffer = VK_NULL_HANDLE;
allocate_info.pNext = &dedicated_info;
@ -2847,14 +2806,6 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
allocate_info.heap_desc.SizeInBytes = align(desc->Width, allocate_info.heap_desc.Alignment);
allocate_info.heap_desc.Flags = heap_flags | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
/* Be very careful with suballocated buffers. */
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV) &&
(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) &&
desc->Width < VKD3D_VA_BLOCK_SIZE)
{
allocate_info.heap_desc.Flags &= ~D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
}
if (FAILED(hr = vkd3d_allocate_heap_memory(device,
&device->memory_allocator, &allocate_info, &object->mem)))
goto fail;
@ -2906,30 +2857,20 @@ HRESULT d3d12_resource_create_placed(struct d3d12_device *device, const D3D12_RE
VkMemoryRequirements memory_requirements;
VkBindImageMemoryInfo bind_info;
struct d3d12_resource *object;
bool force_committed;
VkResult vr;
HRESULT hr;
if (FAILED(hr = d3d12_resource_validate_heap(desc, heap)))
return hr;
/* Placed linear textures are ... problematic
* since we have no way of signalling that they have different alignment and size requirements
* than optimal textures. GetResourceAllocationInfo() does not take heap property information
* and assumes that we are not modifying the tiling mode. */
force_committed = desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER &&
is_cpu_accessible_heap(&heap->desc.Properties);
if (force_committed || heap->allocation.device_allocation.vk_memory == VK_NULL_HANDLE)
if (heap->allocation.device_allocation.vk_memory == VK_NULL_HANDLE)
{
if (!force_committed)
WARN("Placing resource on heap with no memory backing it. Falling back to committed resource.\n");
WARN("Placing resource on heap with no memory backing it. Falling back to committed resource.\n");
if (FAILED(hr = d3d12_resource_create_committed(device, desc, &heap->desc.Properties,
heap->desc.Flags & ~(D3D12_HEAP_FLAG_DENY_BUFFERS |
D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES |
D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES),
initial_state, optimized_clear_value, NULL, resource)))
initial_state, optimized_clear_value, resource)))
{
ERR("Failed to create fallback committed resource.\n");
}
@ -3000,15 +2941,6 @@ HRESULT d3d12_resource_create_placed(struct d3d12_device *device, const D3D12_RE
goto fail;
}
/* Placed RTV and DSV *must* be explicitly initialized after alias barriers and first use,
* so there is no need to do initial layout transition ourselves.
* It is extremely dangerous to do so since the initialization will clobber other
* aliased buffers when clearing DCC/HTILE state.
* For details, see:
* https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-createplacedresource#notes-on-the-required-resource-initialization. */
if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
object->initial_layout_transition = 0;
*resource = object;
return S_OK;

View File

@ -1,67 +0,0 @@
#version 450
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x_id = 0) in;
struct Command
{
uint type;
uint src_offset;
uint dst_offset;
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
{
Command commands[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
uint count;
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
uint count;
};
layout(push_constant) uniform Registers
{
Commands commands_va;
SrcBuffer src_buffer_va;
DstBuffer dst_buffer_va;
uvec2 indirect_count_va;
IndirectCountWrite dst_indirect_count_va;
uint src_stride;
uint dst_stride;
};
void main()
{
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
uint draw_id = gl_WorkGroupID.x;
uint max_draws = gl_NumWorkGroups.x;
if (any(notEqual(indirect_count_va, uvec2(0))))
{
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
if (gl_WorkGroupID.x == 0u)
dst_indirect_count_va.count = max_draws;
}
if (draw_id < max_draws)
{
uint src_offset = src_stride * draw_id + cmd.src_offset;
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
uint src_value = src_buffer_va.values[src_offset];
dst_buffer_va.values[dst_offset] = src_value;
}
}

View File

@ -1,83 +0,0 @@
#version 450
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_GOOGLE_include_directive : require
#include "../../../include/shader-debug/debug_channel.h"
layout(local_size_x_id = 4) in;
struct Command
{
uint type;
uint src_offset;
uint dst_offset;
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
{
Command commands[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
uint count;
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
uint count;
};
layout(push_constant) uniform Registers
{
Commands commands_va;
SrcBuffer src_buffer_va;
DstBuffer dst_buffer_va;
uvec2 indirect_count_va;
IndirectCountWrite dst_indirect_count_va;
uint src_stride;
uint dst_stride;
// Debug metadata here
uint debug_tag;
uint implicit_instance;
};
void main()
{
if (debug_tag != 0u)
DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance);
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
uint draw_id = gl_WorkGroupID.x;
uint max_draws = gl_NumWorkGroups.x;
if (any(notEqual(indirect_count_va, uvec2(0))))
{
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
if (gl_WorkGroupID.x == 0u)
dst_indirect_count_va.count = max_draws;
}
if (debug_tag != 0u && gl_WorkGroupID.x == 0)
DEBUG_CHANNEL_MSG_UNIFORM(int(max_draws), int(gl_NumWorkGroups.x));
if (draw_id < max_draws)
{
uint src_offset = src_stride * draw_id + cmd.src_offset;
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
uint src_value = src_buffer_va.values[src_offset];
if (debug_tag != 0u)
DEBUG_CHANNEL_MSG(cmd.type, dst_offset, src_offset, src_value);
dst_buffer_va.values[dst_offset] = src_value;
}
}

View File

@ -1,68 +0,0 @@
/*
* Copyright 2021 Derek Lesho for Codeweavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
#include "winioctl.h"
#define IOCTL_SHARED_GPU_RESOURCE_SET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 4, METHOD_BUFFERED, FILE_WRITE_ACCESS)
#define IOCTL_SHARED_GPU_RESOURCE_GET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 5, METHOD_BUFFERED, FILE_READ_ACCESS)
#define IOCTL_SHARED_GPU_RESOURCE_OPEN CTL_CODE(FILE_DEVICE_VIDEO, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
bool vkd3d_set_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size)
{
DWORD ret_size;
return DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_SET_METADATA, buf, buf_size, NULL, 0, &ret_size, NULL);
}
bool vkd3d_get_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size, uint32_t *metadata_size)
{
DWORD ret_size;
bool ret = DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_GET_METADATA, NULL, 0, buf, buf_size, &ret_size, NULL);
if (metadata_size)
*metadata_size = ret_size;
return ret;
}
HANDLE vkd3d_open_kmt_handle(HANDLE kmt_handle)
{
struct
{
unsigned int kmt_handle;
/* the following parameter represents a larger sized string for a dynamically allocated struct for use when opening an object by name */
WCHAR name[1];
} shared_resource_open;
HANDLE nt_handle = CreateFileA("\\\\.\\SharedGpuResource", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (nt_handle == INVALID_HANDLE_VALUE)
return nt_handle;
shared_resource_open.kmt_handle = (ULONG_PTR)kmt_handle;
shared_resource_open.name[0] = 0;
if (!DeviceIoControl(nt_handle, IOCTL_SHARED_GPU_RESOURCE_OPEN, &shared_resource_open, sizeof(shared_resource_open), NULL, 0, NULL, NULL))
{
CloseHandle(nt_handle);
return INVALID_HANDLE_VALUE;
}
return nt_handle;
}

View File

@ -945,7 +945,6 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
struct vkd3d_shader_resource_binding *binding;
VkDescriptorSetLayoutCreateFlags vk_flags;
struct vkd3d_shader_root_parameter *param;
uint32_t raw_va_root_descriptor_count = 0;
unsigned int hoisted_parameter_index;
const D3D12_DESCRIPTOR_RANGE1 *range;
unsigned int i, j, k;
@ -1062,13 +1061,10 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
param = &root_signature->parameters[i];
param->parameter_type = p->ParameterType;
param->descriptor.binding = binding;
param->descriptor.raw_va_root_descriptor_index = raw_va_root_descriptor_count;
context->binding_index += 1;
if (raw_va)
raw_va_root_descriptor_count += 1;
else
if (!raw_va)
context->vk_binding += 1;
}
@ -1473,36 +1469,8 @@ fail:
return hr;
}
HRESULT d3d12_root_signature_create_empty(struct d3d12_device *device,
struct d3d12_root_signature **root_signature)
{
struct d3d12_root_signature *object;
D3D12_ROOT_SIGNATURE_DESC1 desc;
HRESULT hr;
if (!(object = vkd3d_malloc(sizeof(*object))))
return E_OUTOFMEMORY;
memset(&desc, 0, sizeof(desc));
hr = d3d12_root_signature_init(object, device, &desc);
/* For pipeline libraries, (and later DXR to some degree), we need a way to
* compare root signature objects. */
object->compatibility_hash = 0;
if (FAILED(hr))
{
vkd3d_free(object);
return hr;
}
*root_signature = object;
return S_OK;
}
static HRESULT d3d12_root_signature_create_from_blob(struct d3d12_device *device,
const void *bytecode, size_t bytecode_length, bool raw_payload,
struct d3d12_root_signature **root_signature)
HRESULT d3d12_root_signature_create(struct d3d12_device *device,
const void *bytecode, size_t bytecode_length, struct d3d12_root_signature **root_signature)
{
const struct vkd3d_shader_code dxbc = {bytecode, bytecode_length};
union
@ -1510,26 +1478,14 @@ static HRESULT d3d12_root_signature_create_from_blob(struct d3d12_device *device
D3D12_VERSIONED_ROOT_SIGNATURE_DESC d3d12;
struct vkd3d_versioned_root_signature_desc vkd3d;
} root_signature_desc;
vkd3d_shader_hash_t compatibility_hash;
struct d3d12_root_signature *object;
HRESULT hr;
int ret;
if (raw_payload)
if ((ret = vkd3d_parse_root_signature_v_1_1(&dxbc, &root_signature_desc.vkd3d)) < 0)
{
if ((ret = vkd3d_parse_root_signature_v_1_1_from_raw_payload(&dxbc, &root_signature_desc.vkd3d, &compatibility_hash)))
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return hresult_from_vkd3d_result(ret);
}
}
else
{
if ((ret = vkd3d_parse_root_signature_v_1_1(&dxbc, &root_signature_desc.vkd3d, &compatibility_hash)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return hresult_from_vkd3d_result(ret);
}
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return hresult_from_vkd3d_result(ret);
}
if (!(object = vkd3d_malloc(sizeof(*object))))
@ -1542,7 +1498,7 @@ static HRESULT d3d12_root_signature_create_from_blob(struct d3d12_device *device
/* For pipeline libraries, (and later DXR to some degree), we need a way to
* compare root signature objects. */
object->compatibility_hash = compatibility_hash;
object->compatibility_hash = vkd3d_shader_hash(&dxbc);
vkd3d_shader_free_root_signature(&root_signature_desc.vkd3d);
if (FAILED(hr))
@ -1558,20 +1514,6 @@ static HRESULT d3d12_root_signature_create_from_blob(struct d3d12_device *device
return S_OK;
}
HRESULT d3d12_root_signature_create(struct d3d12_device *device,
const void *bytecode, size_t bytecode_length,
struct d3d12_root_signature **root_signature)
{
return d3d12_root_signature_create_from_blob(device, bytecode, bytecode_length, false, root_signature);
}
HRESULT d3d12_root_signature_create_raw(struct d3d12_device *device,
const void *payload, size_t payload_length,
struct d3d12_root_signature **root_signature)
{
return d3d12_root_signature_create_from_blob(device, payload, payload_length, true, root_signature);
}
unsigned int d3d12_root_signature_get_shader_interface_flags(const struct d3d12_root_signature *root_signature)
{
unsigned int flags = 0;
@ -2360,7 +2302,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
shader_interface.descriptor_qa_heap_binding = &root_signature->descriptor_qa_heap_binding;
#endif
if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE))
if (!device->global_pipeline_cache)
{
if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, cached_pso, &state->vk_pso_cache)) < 0)
{
@ -2375,7 +2317,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
hr = vkd3d_create_compute_pipeline(device,
&desc->cs, &shader_interface,
root_signature->compute.vk_pipeline_layout,
state->vk_pso_cache,
state->vk_pso_cache ? state->vk_pso_cache : device->global_pipeline_cache,
&state->compute.vk_pipeline,
&state->compute.code);
@ -3117,14 +3059,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
rt_count = ARRAY_SIZE(graphics->blend_attachments);
}
if (!desc->ps.pShaderBytecode || !desc->ps.BytecodeLength)
{
/* Avoids validation errors where app might bind bogus RTV format which does not match the PSO.
* D3D12 validation does not complain about this when PS is NULL since RTVs are not accessed to begin with.
* We can just pretend we have no render targets in this case, which is fine. */
rt_count = 0;
}
graphics->null_attachment_mask = 0;
graphics->rtv_active_mask = 0;
for (i = 0; i < rt_count; ++i)
@ -3642,7 +3576,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
if (can_compile_pipeline_early)
{
if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE))
if (!device->global_pipeline_cache)
{
if ((hr = vkd3d_create_pipeline_cache_from_d3d12_desc(device, cached_pso, &state->vk_pso_cache)) < 0)
{
@ -3652,7 +3586,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
}
if (!(graphics->pipeline = d3d12_pipeline_state_create_pipeline_variant(state, NULL, graphics->dsv_format,
state->vk_pso_cache, &graphics->dynamic_state_flags)))
state->vk_pso_cache ? state->vk_pso_cache : device->global_pipeline_cache, &graphics->dynamic_state_flags)))
goto fail;
}
else
@ -4742,8 +4676,7 @@ static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *dev
* The difference in performance is profound (~15% in some cases).
* On ACO, BDA with NonWritable can be promoted directly to scalar loads,
* which is great. */
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV) ||
device_info->properties2.properties.vendorID != VKD3D_VENDOR_ID_NVIDIA)
if (device_info->properties2.properties.vendorID != VKD3D_VENDOR_ID_NVIDIA)
flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV;
}

View File

@ -804,16 +804,11 @@ static BOOL d3d12_swapchain_is_present_mode_supported(struct d3d12_swapchain *sw
return supported;
}
static bool d3d12_swapchain_has_user_images(struct d3d12_swapchain *swapchain)
static BOOL d3d12_swapchain_has_user_images(struct d3d12_swapchain *swapchain)
{
return !!swapchain->vk_images[0];
}
static bool d3d12_swapchain_has_user_descriptors(struct d3d12_swapchain *swapchain)
{
return swapchain->descriptors.pool != VK_NULL_HANDLE;
}
static HRESULT d3d12_swapchain_get_user_graphics_pipeline(struct d3d12_swapchain *swapchain, VkFormat format)
{
struct d3d12_device *device = d3d12_swapchain_device(swapchain);
@ -934,6 +929,9 @@ static HRESULT d3d12_swapchain_create_user_buffers(struct d3d12_swapchain *swapc
HRESULT hr;
UINT i;
if (d3d12_swapchain_has_user_images(swapchain))
return S_OK;
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
@ -953,38 +951,31 @@ static HRESULT d3d12_swapchain_create_user_buffers(struct d3d12_swapchain *swapc
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
if (!d3d12_swapchain_has_user_images(swapchain))
for (i = 0; i < swapchain->desc.BufferCount; i++)
{
for (i = 0; i < swapchain->desc.BufferCount; i++)
if (FAILED(hr = d3d12_resource_create_committed(d3d12_swapchain_device(swapchain),
&resource_desc, &heap_properties, D3D12_HEAP_FLAG_NONE,
D3D12_RESOURCE_STATE_PRESENT, NULL, &object)))
{
if (FAILED(hr = d3d12_resource_create_committed(d3d12_swapchain_device(swapchain),
&resource_desc, &heap_properties, D3D12_HEAP_FLAG_NONE,
D3D12_RESOURCE_STATE_PRESENT, NULL, NULL, &object)))
{
ERR("Failed to create image for swapchain buffer");
return hr;
}
swapchain->vk_images[i] = object->res.vk_image;
swapchain->buffers[i] = (ID3D12Resource *)&object->ID3D12Resource_iface;
vkd3d_resource_incref(swapchain->buffers[i]);
ID3D12Resource_Release(swapchain->buffers[i]);
/* It is technically possible to just start presenting images without rendering to them.
* The initial resource state for swapchain images is PRESENT.
* Since presentable images are dedicated allocations, we can safely queue a transition into common state
* right away. We will also drain the queue when we release the images, so there is no risk of early delete. */
vkd3d_enqueue_initial_transition(&swapchain->command_queue->ID3D12CommandQueue_iface, swapchain->buffers[i]);
ERR("Failed to create image for swapchain buffer");
return hr;
}
swapchain->vk_images[i] = object->res.vk_image;
swapchain->buffers[i] = (ID3D12Resource *)&object->ID3D12Resource_iface;
vkd3d_resource_incref(swapchain->buffers[i]);
ID3D12Resource_Release(swapchain->buffers[i]);
/* It is technically possible to just start presenting images without rendering to them.
* The initial resource state for swapchain images is PRESENT.
* Since presentable images are dedicated allocations, we can safely queue a transition into common state
* right away. We will also drain the queue when we release the images, so there is no risk of early delete. */
vkd3d_enqueue_initial_transition(&swapchain->command_queue->ID3D12CommandQueue_iface, swapchain->buffers[i]);
}
/* If we don't have a swapchain pipeline layout yet (0x0 surface on first frame),
* we cannot allocate any descriptors yet. We'll create the descriptors eventually
* when we get a proper swapchain working. */
if (!d3d12_swapchain_has_user_descriptors(swapchain) && swapchain->pipeline.vk_set_layout)
if (FAILED(hr = d3d12_swapchain_create_user_descriptors(swapchain, vk_format)))
return hr;
if (FAILED(hr = d3d12_swapchain_create_user_descriptors(swapchain, vk_format)))
return hr;
return S_OK;
}
@ -1235,6 +1226,8 @@ static HRESULT d3d12_swapchain_create_buffers(struct d3d12_swapchain *swapchain,
VkResult vr;
HRESULT hr;
d3d12_swapchain_destroy_views(swapchain);
if ((vr = VK_CALL(vkGetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, NULL))) < 0)
{
WARN("Failed to get Vulkan swapchain images, vr %d.\n", vr);
@ -1327,7 +1320,7 @@ end:
return vr;
}
static void d3d12_swapchain_destroy_resources(struct d3d12_swapchain *swapchain, bool destroy_user_buffers)
static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, BOOL destroy_user_buffers)
{
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
VkQueue vk_queue;
@ -1356,21 +1349,19 @@ static void d3d12_swapchain_destroy_resources(struct d3d12_swapchain *swapchain,
}
}
if (destroy_user_buffers)
for (i = 0; i < swapchain->desc.BufferCount; ++i)
{
for (i = 0; i < swapchain->desc.BufferCount; ++i)
if (swapchain->buffers[i] && destroy_user_buffers)
{
if (swapchain->buffers[i])
{
vkd3d_resource_decref(swapchain->buffers[i]);
swapchain->buffers[i] = NULL;
swapchain->vk_images[i] = VK_NULL_HANDLE;
}
vkd3d_resource_decref(swapchain->buffers[i]);
swapchain->buffers[i] = NULL;
swapchain->vk_images[i] = VK_NULL_HANDLE;
}
d3d12_swapchain_destroy_user_descriptors(swapchain);
}
if (destroy_user_buffers)
d3d12_swapchain_destroy_user_descriptors(swapchain);
if (swapchain->command_queue && swapchain->command_queue->device->vk_device)
{
for (i = 0; i < swapchain->buffer_count; ++i)
@ -1388,8 +1379,6 @@ static void d3d12_swapchain_destroy_resources(struct d3d12_swapchain *swapchain,
VK_CALL(vkDestroyCommandPool(swapchain->command_queue->device->vk_device, swapchain->vk_cmd_pool, NULL));
swapchain->vk_cmd_pool = VK_NULL_HANDLE;
}
d3d12_swapchain_destroy_views(swapchain);
}
static bool d3d12_swapchain_has_nonzero_surface_size(struct d3d12_swapchain *swapchain)
@ -1409,7 +1398,7 @@ static bool d3d12_swapchain_has_nonzero_surface_size(struct d3d12_swapchain *swa
return surface_caps.maxImageExtent.width != 0 && surface_caps.maxImageExtent.height != 0;
}
static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *swapchain, bool force_surface_lost)
static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *swapchain)
{
VkPhysicalDevice vk_physical_device = d3d12_swapchain_device(swapchain)->vk_physical_device;
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
@ -1436,19 +1425,8 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
swapchain->vk_surface, &swapchain->desc, &vk_swapchain_format)))
return hr;
if (force_surface_lost)
{
/* If we cannot successfully present after 2 attempts, we must assume the swapchain
* is in an unstable state with many resizes happening async. Until things stabilize,
* force a dummy swapchain for now so that we can make forward progress.
* When we don't have a proper swapchain, we will attempt again next present. */
vr = VK_ERROR_SURFACE_LOST_KHR;
}
else
{
vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device,
swapchain->vk_surface, &surface_caps));
}
vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device,
swapchain->vk_surface, &surface_caps));
if (vr == VK_ERROR_SURFACE_LOST_KHR)
{
@ -1603,12 +1581,23 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
if (FAILED(hr = d3d12_swapchain_create_user_buffers(swapchain, vk_format)))
return hr;
d3d12_swapchain_destroy_resources(swapchain, false);
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
d3d12_swapchain_destroy_views(swapchain);
swapchain->buffer_count = 0;
return S_OK;
}
}
static HRESULT d3d12_swapchain_recreate_vulkan_swapchain(struct d3d12_swapchain *swapchain)
{
HRESULT hr;
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain)))
ERR("Failed to recreate Vulkan swapchain, hr %#x.\n", hr);
return hr;
}
static inline struct d3d12_swapchain *d3d12_swapchain_from_IDXGISwapChain(dxgi_swapchain_iface *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_swapchain, IDXGISwapChain_iface);
@ -1654,7 +1643,8 @@ static void d3d12_swapchain_destroy(struct d3d12_swapchain *swapchain)
{
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
d3d12_swapchain_destroy_resources(swapchain, true);
d3d12_swapchain_destroy_buffers(swapchain, TRUE);
d3d12_swapchain_destroy_views(swapchain);
if (swapchain->frame_latency_event)
CloseHandle(swapchain->frame_latency_event);
@ -1786,9 +1776,9 @@ static HRESULT d3d12_swapchain_set_sync_interval(struct d3d12_swapchain *swapcha
return S_OK;
}
d3d12_swapchain_destroy_resources(swapchain, false);
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
swapchain->present_mode = present_mode;
return d3d12_swapchain_create_vulkan_swapchain(swapchain, false);
return d3d12_swapchain_recreate_vulkan_swapchain(swapchain);
}
static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain, VkQueue vk_queue)
@ -1806,11 +1796,6 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
if (swapchain->vk_swapchain == VK_NULL_HANDLE)
return VK_SUCCESS;
/* If we know we're already suboptimal, e.g. observed in present or acquire after present,
* just recreate the swapchain right away. */
if (swapchain->is_suboptimal)
return VK_ERROR_OUT_OF_DATE_KHR;
if (swapchain->vk_image_index == INVALID_VK_IMAGE_INDEX)
{
/* If we hit SUBOPTIMAL path last AcquireNextImageKHR, we will have a pending acquire we did not
@ -1831,8 +1816,8 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
/* If we have observed suboptimal once, guarantees that we keep observing it
* until we have recreated the swapchain. */
if (vr == VK_SUBOPTIMAL_KHR)
swapchain->is_suboptimal = true;
if (swapchain->is_suboptimal)
vr = VK_SUBOPTIMAL_KHR;
}
if (vr == VK_SUBOPTIMAL_KHR)
@ -1908,11 +1893,12 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
swapchain->frame_id = (swapchain->frame_id + 1) % swapchain->buffer_count;
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
/* If we have observed suboptimal once, guarantees that we keep observing it
* until we have recreated the swapchain. */
if (vr == VK_SUBOPTIMAL_KHR)
swapchain->is_suboptimal = true;
else if (swapchain->is_suboptimal)
/* If we have observed suboptimal once, guarantees that we keep observing it
* until we have recreated the swapchain. */
if (swapchain->is_suboptimal)
vr = VK_SUBOPTIMAL_KHR;
/* Could get SUBOPTIMAL here. Defer acquiring if we hit that path.
@ -1945,8 +1931,6 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
}
}
/* Not being able to successfully acquire here is okay, we'll defer the acquire to next frame. */
vr = VK_SUCCESS;
}
@ -1990,17 +1974,15 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
return E_FAIL;
}
/* We must have some kind of forward progress here. Keep trying until we exhaust all possible avenues. */
vr = d3d12_swapchain_queue_present(swapchain, vk_queue);
if (vr < 0)
if (vr == VK_ERROR_OUT_OF_DATE_KHR)
{
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
TRACE("Recreating Vulkan swapchain.\n");
d3d12_swapchain_destroy_resources(swapchain, false);
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, false)))
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
if (FAILED(hr = d3d12_swapchain_recreate_vulkan_swapchain(swapchain)))
return hr;
if (!(vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
@ -2010,22 +1992,7 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
}
if ((vr = d3d12_swapchain_queue_present(swapchain, vk_queue)) < 0)
{
ERR("Failed to present after recreating swapchain, vr %d. Attempting fallback swapchain.\n", vr);
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
d3d12_swapchain_destroy_resources(swapchain, false);
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, true)))
return hr;
if (!(vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
{
ERR("Failed to acquire Vulkan queue.\n");
return E_FAIL;
}
if ((vr = d3d12_swapchain_queue_present(swapchain, vk_queue)) < 0)
ERR("Failed to present even after creating dummy swapchain, vr %d. This should not be possible.\n", vr);
}
ERR("Failed to present after recreating swapchain, vr %d.\n", vr);
}
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
@ -2305,9 +2272,9 @@ static HRESULT d3d12_swapchain_resize_buffers(struct d3d12_swapchain *swapchain,
&& desc->Format == new_desc.Format && desc->BufferCount == new_desc.BufferCount)
return S_OK;
d3d12_swapchain_destroy_resources(swapchain, true);
d3d12_swapchain_destroy_buffers(swapchain, TRUE);
swapchain->desc = new_desc;
return d3d12_swapchain_create_vulkan_swapchain(swapchain, false);
return d3d12_swapchain_recreate_vulkan_swapchain(swapchain);
}
static HRESULT STDMETHODCALLTYPE d3d12_swapchain_ResizeBuffers(dxgi_swapchain_iface *iface,
@ -2888,7 +2855,7 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface);
d3d12_device_add_ref(queue->device);
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, false)))
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain)))
{
d3d12_swapchain_destroy(swapchain);
return hr;

View File

@ -1018,16 +1018,6 @@ HRESULT hresult_from_errno(int rc)
HRESULT hresult_from_vk_result(VkResult vr)
{
/* Wine tends to dispatch Vulkan calls to their own syscall stack.
* Crashes are captured and return this magic VkResult.
* Report it explicitly here so it's easier to debug when it happens. */
if (vr == -1073741819)
{
ERR("Detected segfault in Wine syscall handler.\n");
/* HACK: For ad-hoc debugging can also trigger backtrace printing here. */
return E_POINTER;
}
switch (vr)
{
case VK_SUCCESS:

View File

@ -165,28 +165,15 @@ static CONST_VTBL struct ID3D12RootSignatureDeserializerVtbl d3d12_root_signatur
static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
enum vkd3d_root_signature_version target_version,
bool raw_payload,
vkd3d_shader_hash_t *compatibility_hash)
enum vkd3d_root_signature_version target_version)
{
struct vkd3d_versioned_root_signature_desc desc, converted_desc;
int ret;
if (raw_payload)
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc)) < 0)
{
if ((ret = vkd3d_shader_parse_root_signature_raw(dxbc->code, dxbc->size, &desc, compatibility_hash)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return ret;
}
}
else
{
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc, compatibility_hash)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return ret;
}
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return ret;
}
if (desc.version == target_version)
@ -210,27 +197,15 @@ static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code
}
int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
vkd3d_shader_hash_t *compatibility_hash)
struct vkd3d_versioned_root_signature_desc *out_desc)
{
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0, false,
compatibility_hash);
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0);
}
int vkd3d_parse_root_signature_v_1_1(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
vkd3d_shader_hash_t *compatibility_hash)
struct vkd3d_versioned_root_signature_desc *out_desc)
{
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, false,
compatibility_hash);
}
int vkd3d_parse_root_signature_v_1_1_from_raw_payload(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
vkd3d_shader_hash_t *compatibility_hash)
{
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, true,
compatibility_hash);
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1);
}
static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signature_deserializer *deserializer,
@ -241,7 +216,7 @@ static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signatur
deserializer->ID3D12RootSignatureDeserializer_iface.lpVtbl = &d3d12_root_signature_deserializer_vtbl;
deserializer->refcount = 1;
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d)) < 0)
return hresult_from_vkd3d_result(ret);
return S_OK;
@ -419,7 +394,7 @@ static HRESULT d3d12_versioned_root_signature_deserializer_init(struct d3d12_ver
deserializer->ID3D12VersionedRootSignatureDeserializer_iface.lpVtbl = &d3d12_versioned_root_signature_deserializer_vtbl;
deserializer->refcount = 1;
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return hresult_from_vkd3d_result(ret);

View File

@ -50,6 +50,7 @@
#define MAKE_MAGIC(a,b,c,d) (((uint32_t)a) | (((uint32_t)b) << 8) | (((uint32_t)c) << 16) | (((uint32_t)d) << 24))
#define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 10u
#define VKD3D_MAX_SHADER_EXTENSIONS 6u
#define VKD3D_MAX_SHADER_STAGES 5u
#define VKD3D_MAX_VK_SYNC_OBJECTS 4u
@ -61,8 +62,6 @@
#define VKD3D_TILE_SIZE 65536
typedef ID3D12Fence1 d3d12_fence_iface;
struct d3d12_command_list;
struct d3d12_device;
struct d3d12_resource;
@ -131,14 +130,9 @@ struct vkd3d_vulkan_info
bool KHR_bind_memory2;
bool KHR_copy_commands2;
bool KHR_dynamic_rendering;
bool KHR_depth_stencil_resolve;
bool KHR_driver_properties;
bool KHR_uniform_buffer_standard_layout;
bool KHR_maintenance4;
bool KHR_ray_tracing_maintenance1;
bool KHR_fragment_shader_barycentric;
bool KHR_external_memory_win32;
bool KHR_external_semaphore_win32;
/* EXT device extensions */
bool EXT_calibrated_timestamps;
bool EXT_conditional_rendering;
@ -176,7 +170,6 @@ struct vkd3d_vulkan_info
bool NV_fragment_shader_barycentric;
bool NV_compute_shader_derivatives;
bool NV_device_diagnostic_checkpoints;
bool NV_device_generated_commands;
/* VALVE extensions */
bool VALVE_mutable_descriptor_type;
bool VALVE_descriptor_set_host_mapping;
@ -193,7 +186,7 @@ struct vkd3d_vulkan_info
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
unsigned int shader_extension_count;
enum vkd3d_shader_target_extension shader_extensions[VKD3D_SHADER_TARGET_EXTENSION_COUNT];
enum vkd3d_shader_target_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS];
};
struct vkd3d_instance
@ -230,12 +223,8 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha
struct vkd3d_waiting_fence
{
d3d12_fence_iface *fence;
VkSemaphore submission_timeline;
struct d3d12_fence *fence;
uint64_t value;
LONG **submission_counters;
size_t num_submission_counts;
bool signal;
};
struct vkd3d_fence_worker
@ -516,6 +505,8 @@ static inline HRESULT vkd3d_set_private_data_interface(struct vkd3d_private_stor
HRESULT STDMETHODCALLTYPE d3d12_object_SetName(ID3D12Object *iface, const WCHAR *name);
/* ID3D12Fence */
typedef ID3D12Fence1 d3d12_fence_iface;
struct d3d12_fence_value
{
uint64_t virtual_value;
@ -585,52 +576,6 @@ HRESULT d3d12_fence_create(struct d3d12_device *device,
HRESULT d3d12_fence_set_event_on_completion(struct d3d12_fence *fence,
UINT64 value, HANDLE event, enum vkd3d_waiting_event_type type);
struct d3d12_shared_fence
{
d3d12_fence_iface ID3D12Fence_iface;
LONG refcount_internal;
LONG refcount;
D3D12_FENCE_FLAGS d3d12_flags;
VkSemaphore timeline_semaphore;
struct d3d12_device *device;
struct vkd3d_private_store private_store;
};
static inline struct d3d12_shared_fence *shared_impl_from_ID3D12Fence1(ID3D12Fence1 *iface)
{
extern CONST_VTBL struct ID3D12Fence1Vtbl d3d12_shared_fence_vtbl;
if (!iface)
return NULL;
assert(iface->lpVtbl == &d3d12_shared_fence_vtbl);
return CONTAINING_RECORD(iface, struct d3d12_shared_fence, ID3D12Fence_iface);
}
static inline struct d3d12_shared_fence *shared_impl_from_ID3D12Fence(ID3D12Fence *iface)
{
return shared_impl_from_ID3D12Fence1((ID3D12Fence1 *)iface);
}
HRESULT d3d12_shared_fence_create(struct d3d12_device *device,
uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_shared_fence **fence);
static inline bool is_shared_ID3D12Fence1(ID3D12Fence1 *iface)
{
extern CONST_VTBL struct ID3D12Fence1Vtbl d3d12_shared_fence_vtbl;
extern CONST_VTBL struct ID3D12Fence1Vtbl d3d12_fence_vtbl;
assert(iface->lpVtbl == &d3d12_shared_fence_vtbl || iface->lpVtbl == &d3d12_fence_vtbl);
return iface->lpVtbl == &d3d12_shared_fence_vtbl;
}
static inline bool is_shared_ID3D12Fence(ID3D12Fence *iface)
{
return is_shared_ID3D12Fence1((ID3D12Fence1 *)iface);
}
enum vkd3d_allocation_flag
{
VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER = (1u << 0),
@ -639,10 +584,6 @@ enum vkd3d_allocation_flag
VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH = (1u << 3),
VKD3D_ALLOCATION_FLAG_NO_FALLBACK = (1u << 4),
VKD3D_ALLOCATION_FLAG_DEDICATED = (1u << 5),
/* Intended for internal allocation of scratch buffers.
* They are never suballocated since we do that ourselves,
* and we do not consume space in the VA map. */
VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH = (1u << 6),
};
#define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 8)
@ -664,7 +605,6 @@ struct vkd3d_allocate_heap_memory_info
{
D3D12_HEAP_DESC heap_desc;
void *host_ptr;
uint32_t extra_allocation_flags;
};
struct vkd3d_allocate_resource_memory_info
@ -956,7 +896,7 @@ VkImageSubresource vk_image_subresource_from_d3d12(
HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12_RESOURCE_DESC1 *desc,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value, HANDLE shared_handle, struct d3d12_resource **resource);
const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource);
HRESULT d3d12_resource_create_placed(struct d3d12_device *device, const D3D12_RESOURCE_DESC1 *desc,
struct d3d12_heap *heap, uint64_t heap_offset, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource);
@ -1455,10 +1395,6 @@ struct d3d12_root_signature
HRESULT d3d12_root_signature_create(struct d3d12_device *device, const void *bytecode,
size_t bytecode_length, struct d3d12_root_signature **root_signature);
HRESULT d3d12_root_signature_create_raw(struct d3d12_device *device, const void *payload,
size_t payload_size, struct d3d12_root_signature **root_signature);
HRESULT d3d12_root_signature_create_empty(struct d3d12_device *device,
struct d3d12_root_signature **root_signature);
/* Private ref counts, for pipeline library. */
void d3d12_root_signature_inc_ref(struct d3d12_root_signature *state);
void d3d12_root_signature_dec_ref(struct d3d12_root_signature *state);
@ -1481,14 +1417,9 @@ HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device,
VkPipelineLayout *pipeline_layout);
int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash);
struct vkd3d_versioned_root_signature_desc *desc);
int vkd3d_parse_root_signature_v_1_1(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash);
int vkd3d_parse_root_signature_v_1_1_from_raw_payload(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash);
struct vkd3d_versioned_root_signature_desc *desc);
VkShaderStageFlags vkd3d_vk_stage_flags_from_visibility(D3D12_SHADER_VISIBILITY visibility);
enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility);
@ -1518,11 +1449,10 @@ struct vkd3d_shader_debug_ring_spec_constants
uint32_t ring_words;
};
#define VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES 4
struct vkd3d_shader_debug_ring_spec_info
{
struct vkd3d_shader_debug_ring_spec_constants constants;
VkSpecializationMapEntry map_entries[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES];
VkSpecializationMapEntry map_entries[4];
VkSpecializationInfo spec_info;
};
@ -1892,9 +1822,7 @@ struct vkd3d_scratch_buffer
#define VKD3D_QUERY_TYPE_INDEX_TRANSFORM_FEEDBACK (2u)
#define VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE (3u)
#define VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE (4u)
#define VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE (5u)
#define VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS (6u)
#define VKD3D_VIRTUAL_QUERY_TYPE_COUNT (7u)
#define VKD3D_VIRTUAL_QUERY_TYPE_COUNT (5u)
#define VKD3D_VIRTUAL_QUERY_POOL_COUNT (128u)
struct vkd3d_query_pool
@ -1905,20 +1833,6 @@ struct vkd3d_query_pool
uint32_t next_index;
};
struct d3d12_command_allocator_scratch_pool
{
struct vkd3d_scratch_buffer *scratch_buffers;
size_t scratch_buffers_size;
size_t scratch_buffer_count;
};
enum vkd3d_scratch_pool_kind
{
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0,
VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS,
VKD3D_SCRATCH_POOL_KIND_COUNT
};
/* ID3D12CommandAllocator */
struct d3d12_command_allocator
{
@ -1945,7 +1859,9 @@ struct d3d12_command_allocator
size_t command_buffers_size;
size_t command_buffer_count;
struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
struct vkd3d_scratch_buffer *scratch_buffers;
size_t scratch_buffers_size;
size_t scratch_buffer_count;
struct vkd3d_query_pool *query_pools;
size_t query_pools_size;
@ -2133,39 +2049,6 @@ struct d3d12_buffer_copy_tracked_buffer
VkDeviceSize hazard_end;
};
enum vkd3d_batch_type
{
VKD3D_BATCH_TYPE_NONE,
VKD3D_BATCH_TYPE_COPY_BUFFER_TO_IMAGE,
VKD3D_BATCH_TYPE_COPY_IMAGE_TO_BUFFER,
VKD3D_BATCH_TYPE_COPY_IMAGE,
};
struct vkd3d_image_copy_info
{
D3D12_TEXTURE_COPY_LOCATION src, dst;
const struct vkd3d_format *src_format, *dst_format;
enum vkd3d_batch_type batch_type;
union
{
VkBufferImageCopy2KHR buffer_image;
VkImageCopy2KHR image;
} copy;
/* TODO: split d3d12_command_list_copy_image too, so this can be a local variable of before_copy_texture_region. */
bool writes_full_subresource;
VkImageLayout src_layout;
VkImageLayout dst_layout;
};
#define VKD3D_COPY_TEXTURE_REGION_MAX_BATCH_SIZE 16
struct d3d12_transfer_batch_state
{
enum vkd3d_batch_type batch_type;
struct vkd3d_image_copy_info batch[VKD3D_COPY_TEXTURE_REGION_MAX_BATCH_SIZE];
size_t batch_len;
};
struct d3d12_command_list
{
d3d12_command_list_iface ID3D12GraphicsCommandList_iface;
@ -2179,25 +2062,12 @@ struct d3d12_command_list
bool is_valid;
bool debug_capture;
bool has_replaced_shaders;
struct
{
VkBuffer buffer;
VkDeviceSize offset;
DXGI_FORMAT dxgi_format;
VkIndexType vk_type;
bool is_dirty;
} index_buffer;
struct
{
bool has_observed_transition_to_indirect;
bool has_emitted_indirect_to_compute_barrier;
} execute_indirect;
bool has_valid_index_buffer;
VkCommandBuffer vk_command_buffer;
VkCommandBuffer vk_init_commands;
DXGI_FORMAT index_buffer_format;
struct d3d12_rtv_desc rtvs[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT];
struct d3d12_rtv_desc dsv;
uint32_t dsv_plane_optimal_mask;
@ -2265,7 +2135,13 @@ struct d3d12_command_list
struct d3d12_buffer_copy_tracked_buffer tracked_copy_buffers[VKD3D_BUFFER_COPY_TRACKING_BUFFER_COUNT];
unsigned int tracked_copy_buffer_count;
struct d3d12_transfer_batch_state transfer_batch;
/* Hackery needed for game workarounds. */
struct
{
/* Used to keep track of COLOR write -> COMPUTE where game forget to insert barrier
* before the dispatch. */
bool has_pending_color_write;
} workaround_state;
struct vkd3d_private_store private_store;
@ -2339,8 +2215,6 @@ struct vkd3d_queue
VkCommandPool barrier_pool;
VkCommandBuffer barrier_command_buffer;
VkSemaphore serializing_binary_semaphore;
VkSemaphore submission_timeline;
uint64_t submission_timeline_count;
uint32_t vk_family_index;
VkQueueFlags vk_queue_flags;
@ -2353,8 +2227,6 @@ struct vkd3d_queue
size_t wait_values_size;
VkPipelineStageFlags *wait_stages;
size_t wait_stages_size;
d3d12_fence_iface **wait_fences;
size_t wait_fences_size;
uint32_t wait_count;
};
@ -2363,7 +2235,7 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, u
const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue);
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device);
void vkd3d_queue_release(struct vkd3d_queue *queue);
void vkd3d_queue_add_wait(struct vkd3d_queue *queue, d3d12_fence_iface *waiter, VkSemaphore semaphore, uint64_t value);
void vkd3d_queue_add_wait(struct vkd3d_queue *queue, VkSemaphore semaphore, uint64_t value);
enum vkd3d_submission_type
{
@ -2399,13 +2271,13 @@ struct vkd3d_sparse_memory_bind_range
struct d3d12_command_queue_submission_wait
{
d3d12_fence_iface *fence;
struct d3d12_fence *fence;
UINT64 value;
};
struct d3d12_command_queue_submission_signal
{
d3d12_fence_iface *fence;
struct d3d12_fence *fence;
UINT64 value;
};
@ -2492,35 +2364,6 @@ HRESULT d3d12_command_queue_create(struct d3d12_device *device,
const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue);
void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue);
struct vkd3d_execute_indirect_info
{
VkPipelineLayout vk_pipeline_layout;
VkPipeline vk_pipeline;
};
enum vkd3d_patch_command_token
{
VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32 = 0,
VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO = 1,
VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI = 2,
VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE = 3,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT = 4,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO = 5,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI = 6,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE = 7,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE = 8,
VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO = 9,
VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI = 10,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT = 11,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT = 12,
VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT = 13,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX = 14,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX = 15,
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE = 16,
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET = 17,
VKD3D_PATCH_COMMAND_INT_MAX = 0x7fffffff
};
/* ID3D12CommandSignature */
struct d3d12_command_signature
{
@ -2528,27 +2371,13 @@ struct d3d12_command_signature
LONG refcount;
D3D12_COMMAND_SIGNATURE_DESC desc;
uint32_t argument_buffer_offset;
/* Complex command signatures require some work to stamp out device generated commands. */
struct
{
VkBuffer buffer;
VkDeviceAddress buffer_va;
struct vkd3d_device_memory_allocation memory;
VkIndirectCommandsLayoutNV layout;
uint32_t stride;
struct vkd3d_execute_indirect_info pipeline;
} state_template;
bool requires_state_template;
struct d3d12_device *device;
struct vkd3d_private_store private_store;
};
HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12_root_signature *root_signature,
const D3D12_COMMAND_SIGNATURE_DESC *desc,
HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_COMMAND_SIGNATURE_DESC *desc,
struct d3d12_command_signature **signature);
static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface)
@ -2625,7 +2454,6 @@ enum vkd3d_breadcrumb_command_type
VKD3D_BREADCRUMB_COMMAND_DRAW_INDEXED,
VKD3D_BREADCRUMB_COMMAND_DISPATCH,
VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT,
VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE,
VKD3D_BREADCRUMB_COMMAND_COPY,
VKD3D_BREADCRUMB_COMMAND_RESOLVE,
VKD3D_BREADCRUMB_COMMAND_WBI,
@ -2642,7 +2470,6 @@ enum vkd3d_breadcrumb_command_type
VKD3D_BREADCRUMB_COMMAND_IBO,
VKD3D_BREADCRUMB_COMMAND_ROOT_DESC,
VKD3D_BREADCRUMB_COMMAND_ROOT_CONST,
VKD3D_BREADCRUMB_COMMAND_TAG,
};
#ifdef VKD3D_ENABLE_BREADCRUMBS
@ -2666,8 +2493,6 @@ struct vkd3d_breadcrumb_command
uint32_t word_32bit;
uint64_t word_64bit;
uint32_t count;
/* Pointer must remain alive. */
const char *tag;
};
};
@ -3125,41 +2950,6 @@ HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
struct d3d12_device *device);
struct vkd3d_execute_indirect_args
{
VkDeviceAddress template_va;
VkDeviceAddress api_buffer_va;
VkDeviceAddress device_generated_commands_va;
VkDeviceAddress indirect_count_va;
VkDeviceAddress dst_indirect_count_va;
uint32_t api_buffer_word_stride;
uint32_t device_generated_commands_word_stride;
/* Arbitrary tag used for debug version of state patcher. Debug messages from tag 0 are ignored. */
uint32_t debug_tag;
uint32_t implicit_instance;
};
struct vkd3d_execute_indirect_pipeline
{
VkPipeline vk_pipeline;
uint32_t workgroup_size_x;
};
struct vkd3d_execute_indirect_ops
{
VkPipelineLayout vk_pipeline_layout;
struct vkd3d_execute_indirect_pipeline *pipelines;
size_t pipelines_count;
size_t pipelines_size;
pthread_mutex_t mutex;
};
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device);
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device);
struct vkd3d_meta_ops_common
{
VkShaderModule vk_module_fullscreen_vs;
@ -3175,7 +2965,6 @@ struct vkd3d_meta_ops
struct vkd3d_swapchain_ops swapchain;
struct vkd3d_query_ops query;
struct vkd3d_predicate_ops predicate;
struct vkd3d_execute_indirect_ops execute_indirect;
};
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
@ -3208,9 +2997,6 @@ bool vkd3d_meta_get_query_gather_pipeline(struct vkd3d_meta_ops *meta_ops,
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info);
enum vkd3d_time_domain_flag
{
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,
@ -3245,7 +3031,6 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR shader_integer_dot_product_properties;
VkPhysicalDeviceDriverPropertiesKHR driver_properties;
VkPhysicalDeviceMaintenance4PropertiesKHR maintenance4_properties;
VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV device_generated_commands_properties_nv;
VkPhysicalDeviceProperties2KHR properties2;
@ -3277,7 +3062,6 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR separate_depth_stencil_layout_features;
VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR shader_integer_dot_product_features;
VkPhysicalDeviceFragmentShaderBarycentricFeaturesNV barycentric_features_nv;
VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR barycentric_features_khr;
VkPhysicalDeviceRayQueryFeaturesKHR ray_query_features;
VkPhysicalDeviceComputeShaderDerivativesFeaturesNV compute_shader_derivatives_features_nv;
VkPhysicalDeviceShaderAtomicInt64FeaturesKHR shader_atomic_int64_features;
@ -3289,8 +3073,6 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamic_rendering_features;
VkPhysicalDeviceCoherentMemoryFeaturesAMD device_coherent_memory_features_amd;
VkPhysicalDeviceMaintenance4FeaturesKHR maintenance4_features;
VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR ray_tracing_maintenance1_features;
VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV device_generated_commands_features_nv;
VkPhysicalDeviceFeatures2 features2;
@ -3356,12 +3138,6 @@ struct vkd3d_descriptor_qa_heap_buffer_data;
/* ID3D12DeviceExt */
typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
struct d3d12_device_scratch_pool
{
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
size_t scratch_buffer_count;
};
struct d3d12_device
{
d3d12_device_iface ID3D12Device_iface;
@ -3396,7 +3172,8 @@ struct d3d12_device
struct vkd3d_memory_allocator memory_allocator;
struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
size_t scratch_buffer_count;
struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT];
size_t query_pool_count;
@ -3428,6 +3205,7 @@ struct d3d12_device
#ifdef VKD3D_ENABLE_DESCRIPTOR_QA
struct vkd3d_descriptor_qa_global_info *descriptor_qa_global_info;
#endif
VkPipelineCache global_pipeline_cache;
uint64_t shader_interface_key;
};
@ -3469,10 +3247,8 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if
bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta);
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch);
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
const struct vkd3d_scratch_buffer *scratch);
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch);
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch);
HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool);
void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);
@ -3618,15 +3394,6 @@ struct d3d12_state_object_stack_info
uint32_t max_closest;
};
#ifdef VKD3D_ENABLE_BREADCRUMBS
struct d3d12_state_object_breadcrumb_shader
{
vkd3d_shader_hash_t hash;
VkShaderStageFlagBits stage;
char name[64];
};
#endif
struct d3d12_state_object
{
d3d12_state_object_iface ID3D12StateObject_iface;
@ -3664,8 +3431,6 @@ struct d3d12_state_object
VkDescriptorSet desc_set;
VkDescriptorPool desc_pool;
uint32_t set_index;
uint64_t compatibility_hash;
bool owned_handles;
} local_static_sampler;
UINT64 pipeline_stack_size;
@ -3676,13 +3441,6 @@ struct d3d12_state_object
struct d3d12_root_signature *global_root_signature;
#ifdef VKD3D_ENABLE_BREADCRUMBS
/* For breadcrumbs. */
struct d3d12_state_object_breadcrumb_shader *breadcrumb_shaders;
size_t breadcrumb_shaders_size;
size_t breadcrumb_shaders_count;
#endif
struct vkd3d_private_store private_store;
};
@ -3962,74 +3720,6 @@ void vkd3d_acceleration_structure_copy(
D3D12_GPU_VIRTUAL_ADDRESS dst, D3D12_GPU_VIRTUAL_ADDRESS src,
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode);
typedef enum D3D11_USAGE
{
D3D11_USAGE_DEFAULT,
D3D11_USAGE_IMMUTABLE,
D3D11_USAGE_DYNAMIC,
D3D11_USAGE_STAGING,
} D3D11_USAGE;
typedef enum D3D11_BIND_FLAG
{
D3D11_BIND_VERTEX_BUFFER = 0x0001,
D3D11_BIND_INDEX_BUFFER = 0x0002,
D3D11_BIND_CONSTANT_BUFFER = 0x0004,
D3D11_BIND_SHADER_RESOURCE = 0x0008,
D3D11_BIND_STREAM_OUTPUT = 0x0010,
D3D11_BIND_RENDER_TARGET = 0x0020,
D3D11_BIND_DEPTH_STENCIL = 0x0040,
D3D11_BIND_UNORDERED_ACCESS = 0x0080,
D3D11_BIND_DECODER = 0x0200,
D3D11_BIND_VIDEO_ENCODER = 0x0400
} D3D11_BIND_FLAG;
typedef enum D3D11_TEXTURE_LAYOUT
{
D3D11_TEXTURE_LAYOUT_UNDEFINED = 0x0,
D3D11_TEXTURE_LAYOUT_ROW_MAJOR = 0x1,
D3D11_TEXTURE_LAYOUT_64K_STANDARD_SWIZZLE = 0x2,
} D3D11_TEXTURE_LAYOUT;
typedef enum D3D11_RESOURCE_MISC_FLAG
{
D3D11_RESOURCE_MISC_GENERATE_MIPS = 0x1,
D3D11_RESOURCE_MISC_SHARED = 0x2,
D3D11_RESOURCE_MISC_TEXTURECUBE = 0x4,
D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS = 0x10,
D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS = 0x20,
D3D11_RESOURCE_MISC_BUFFER_STRUCTURED = 0x40,
D3D11_RESOURCE_MISC_RESOURCE_CLAMP = 0x80,
D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX = 0x100,
D3D11_RESOURCE_MISC_GDI_COMPATIBLE = 0x200,
D3D11_RESOURCE_MISC_SHARED_NTHANDLE = 0x800,
D3D11_RESOURCE_MISC_RESTRICTED_CONTENT = 0x1000,
D3D11_RESOURCE_MISC_RESTRICT_SHARED_RESOURCE = 0x2000,
D3D11_RESOURCE_MISC_RESTRICT_SHARED_RESOURCE_DRIVER = 0x4000,
D3D11_RESOURCE_MISC_GUARDED = 0x8000,
D3D11_RESOURCE_MISC_TILE_POOL = 0x20000,
D3D11_RESOURCE_MISC_TILED = 0x40000,
D3D11_RESOURCE_MISC_HW_PROTECTED = 0x80000,
} D3D11_RESOURCE_MISC_FLAG;
struct DxvkSharedTextureMetadata {
UINT Width;
UINT Height;
UINT MipLevels;
UINT ArraySize;
DXGI_FORMAT Format;
DXGI_SAMPLE_DESC SampleDesc;
D3D11_USAGE Usage;
UINT BindFlags;
UINT CPUAccessFlags;
UINT MiscFlags;
D3D11_TEXTURE_LAYOUT TextureLayout;
};
bool vkd3d_set_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size);
bool vkd3d_get_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size, uint32_t *metadata_size);
HANDLE vkd3d_open_kmt_handle(HANDLE kmt_handle);
#define VKD3D_VENDOR_ID_NVIDIA 0x10DE
#define VKD3D_VENDOR_ID_AMD 0x1002
#define VKD3D_VENDOR_ID_INTEL 0x8086

View File

@ -45,8 +45,6 @@ enum vkd3d_meta_copy_mode
#include <cs_resolve_binary_queries.h>
#include <cs_resolve_predicate.h>
#include <cs_resolve_query.h>
#include <cs_execute_indirect_patch.h>
#include <cs_execute_indirect_patch_debug_ring.h>
#include <vs_fullscreen_layer.h>
#include <vs_fullscreen.h>
#include <gs_fullscreen.h>

View File

@ -49,7 +49,6 @@ VK_INSTANCE_PFN(vkGetPhysicalDeviceQueueFamilyProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceSparseImageFormatProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures2)
VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties2)
VK_INSTANCE_PFN(vkGetPhysicalDeviceExternalSemaphoreProperties)
/* VK_EXT_debug_utils */
VK_INSTANCE_EXT_PFN(vkCreateDebugUtilsMessengerEXT)
@ -224,18 +223,6 @@ VK_DEVICE_EXT_PFN(vkGetDeviceBufferMemoryRequirementsKHR)
VK_DEVICE_EXT_PFN(vkGetDeviceImageMemoryRequirementsKHR)
VK_DEVICE_EXT_PFN(vkGetDeviceImageSparseMemoryRequirementsKHR)
#ifdef VK_KHR_external_memory_win32
/* VK_KHR_external_memory_win32 */
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandleKHR)
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandlePropertiesKHR)
#endif
#ifdef VK_KHR_external_semaphore_win32
/* VK_KHR_external_semaphore_win32 */
VK_DEVICE_EXT_PFN(vkGetSemaphoreWin32HandleKHR)
VK_DEVICE_EXT_PFN(vkImportSemaphoreWin32HandleKHR)
#endif
/* VK_EXT_calibrated_timestamps */
VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
@ -293,9 +280,6 @@ VK_DEVICE_EXT_PFN(vkQueuePresentKHR)
VK_DEVICE_EXT_PFN(vkCmdBeginRenderingKHR)
VK_DEVICE_EXT_PFN(vkCmdEndRenderingKHR)
/* VK_KHR_ray_tracing_maintenance1 */
VK_DEVICE_EXT_PFN(vkCmdTraceRaysIndirect2KHR)
/* VK_AMD_buffer_marker */
VK_DEVICE_EXT_PFN(vkCmdWriteBufferMarkerAMD)
@ -318,12 +302,6 @@ VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)
VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE)
VK_DEVICE_EXT_PFN(vkGetDescriptorSetHostMappingVALVE)
/* VK_NV_device_generated_commands */
VK_DEVICE_EXT_PFN(vkCreateIndirectCommandsLayoutNV)
VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV)
VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV)
VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV)
#undef VK_INSTANCE_PFN
#undef VK_INSTANCE_EXT_PFN
#undef VK_DEVICE_PFN

View File

@ -83,7 +83,7 @@ idl_generator = generator(idl_compiler,
arguments : [ '-h', '-o', '@OUTPUT@', '@INPUT@' ])
glsl_compiler = find_program('glslangValidator')
glsl_args = [ '-V', '--target-env', 'vulkan1.1', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
glsl_args = [ '-V', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
if run_command(glsl_compiler, [ '--quiet', '--version' ], check : false).returncode() == 0
glsl_args += [ '--quiet' ]
endif

View File

@ -75,35 +75,17 @@ def main():
parser.add_argument('--per-iteration', action = 'store_true', help = 'Represent ticks in terms of ticks / iteration. Cannot be used with --divider.')
parser.add_argument('--name', nargs = '+', type = str, help = 'Only display data for certain counters.')
parser.add_argument('--sort', type = str, default = 'none', help = 'Sorts input data according to "iterations" or "ticks".')
parser.add_argument('--delta', type = str, help = 'Subtract iterations and timing from other profile blob.')
parser.add_argument('profile', help = 'The profile binary blob.')
args = parser.parse_args()
if not args.profile:
raise AssertionError('Need profile folder.')
delta_map = {}
if args.delta is not None:
with open(args.delta, 'rb') as f:
for block in iter(lambda: f.read(64), b''):
if is_valid_block(block):
b = parse_block(block)
delta_map[b.name] = b
blocks = []
with open(args.profile, 'rb') as f:
for block in iter(lambda: f.read(64), b''):
if is_valid_block(block):
b = parse_block(block)
if b.name in delta_map:
d = delta_map[b.name]
b = ProfileCase(ticks = b.ticks - d.ticks,
iterations = b.iterations - d.iterations,
name = b.name)
if b.iterations < 0 or b.ticks < 0:
raise AssertionError('After subtracting, iterations or ticks became negative.')
if b.iterations > 0:
blocks.append(b)
blocks.append(parse_block(block))
if args.divider is not None:
if args.per_iteration:
@ -132,11 +114,11 @@ def main():
print(' Iterations:', block.iterations)
if args.divider is not None:
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "Kcycles")
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "us")
elif args.per_iteration:
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "us")
else:
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "us")
if __name__ == '__main__':
main()

@ -1 +1 @@
Subproject commit 245d25ce8c3337919dc7916d0e62e31a0d8748ab
Subproject commit 6eb8fc3598ed2a9777677fbe59038c8d0664a434

@ -1 +1 @@
Subproject commit 9f2fd6356c14376ab5b88518d6dd4e6787084525
Subproject commit be697eb2ae18f62bf2ea57d8213fd7afc93b7433

View File

@ -141,7 +141,7 @@ void test_clear_depth_stencil_view(void)
void test_clear_render_target_view(void)
{
static const unsigned int array_expected_colors[] = {0xff00ff00, 0xff0000ff, 0xffff0000};
static const float array_colors[][4] =
static const struct vec4 array_colors[] =
{
{0.0f, 1.0f, 0.0f, 1.0f},
{1.0f, 0.0f, 0.0f, 1.0f},
@ -324,7 +324,8 @@ void test_clear_render_target_view(void)
rtv_desc.Texture2DArray.ArraySize = 1;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, &array_colors[i].x, 0, NULL);
}
transition_resource_state(command_list, resource,
@ -354,7 +355,8 @@ void test_clear_render_target_view(void)
rtv_desc.Texture2DMSArray.ArraySize = 1;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, &array_colors[i].x, 0, NULL);
}
transition_resource_state(command_list, resource,

View File

@ -1159,8 +1159,8 @@ void test_bundle_state_inheritance(void)
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
ID3D12GraphicsCommandList_Release(bundle);
ID3D12CommandAllocator_Release(bundle_allocator);
ID3D12GraphicsCommandList_Release(bundle);
destroy_test_context(&context);
}
@ -1449,721 +1449,6 @@ void test_vbv_stride_edge_cases(void)
destroy_test_context(&context);
}
void test_execute_indirect_state(void)
{
static const struct vec4 values = { 1000.0f, 2000.0f, 3000.0f, 4000.0f };
D3D12_INDIRECT_ARGUMENT_DESC indirect_argument_descs[2];
D3D12_COMMAND_SIGNATURE_DESC command_signature_desc;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12CommandSignature *command_signature;
D3D12_SO_DECLARATION_ENTRY so_entries[1];
ID3D12GraphicsCommandList *command_list;
D3D12_ROOT_PARAMETER root_parameters[4];
ID3D12RootSignature *root_signatures[2];
ID3D12Resource *argument_buffer_late;
D3D12_STREAM_OUTPUT_BUFFER_VIEW sov;
ID3D12Resource *streamout_buffer;
D3D12_VERTEX_BUFFER_VIEW vbvs[2];
ID3D12Resource *argument_buffer;
struct test_context_desc desc;
ID3D12PipelineState *psos[2];
struct test_context context;
struct resource_readback rb;
D3D12_INDEX_BUFFER_VIEW ibv;
ID3D12CommandQueue *queue;
const UINT so_stride = 16;
ID3D12Resource *vbo[3];
ID3D12Resource *ibo[2];
unsigned int i, j, k;
ID3D12Resource *cbv;
ID3D12Resource *srv;
ID3D12Resource *uav;
HRESULT hr;
static const D3D12_INPUT_ELEMENT_DESC layout_desc[] =
{
{"COLOR", 0, DXGI_FORMAT_R32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
{"COLOR", 1, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
};
struct test
{
const D3D12_INDIRECT_ARGUMENT_DESC *indirect_arguments;
uint32_t indirect_argument_count;
const void *argument_buffer_data;
size_t argument_buffer_size;
uint32_t api_max_count;
const struct vec4 *expected_output;
uint32_t expected_output_count;
uint32_t stride;
uint32_t pso_index;
bool needs_root_sig;
};
/* Modify root parameters. */
struct root_constant_data
{
float constants[2];
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
};
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_sig[2] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 2 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
};
static const struct root_constant_data root_constant_data[] =
{
{
.constants = { 100.0f, 500.0f },
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
},
{
.constants = { 200.0f, 800.0f },
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
},
};
static const struct vec4 root_constant_expected[] =
{
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
};
/* Modify root parameters, but very large root signature to test boundary conditions. */
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_spill_sig[2] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 44 + 1, .Num32BitValuesToSet = 2 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
};
static const struct root_constant_data root_constant_spill_data[] =
{
{
.constants = { 100.0f, 500.0f },
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
},
{
.constants = { 200.0f, 800.0f },
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
},
};
static const struct vec4 root_constant_spill_expected[] =
{
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
};
/* Modify VBOs. */
struct indirect_vbo_data
{
D3D12_VERTEX_BUFFER_VIEW view[2];
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
};
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_sig[3] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 1 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
};
/* Fill buffer locations later. */
struct indirect_vbo_data indirect_vbo_data[] =
{
{
.view = { { 0, 64, 8 }, { 0, 64, 16 } },
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 2 }
},
{
/* Test indirectly binding NULL descriptor and 0 stride. */
.view = { { 0, 0, 0 }, { 0, 64, 0 } },
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
}
};
static const struct vec4 indirect_vbo_expected[] =
{
{ 1064.0f, 2128.0f, 3000.0f, 4000.0f },
{ 1066.0f, 2132.0f, 3000.0f, 4000.0f },
{ 1064.0f, 2128.0f, 3000.0f, 4001.0f },
{ 1066.0f, 2132.0f, 3000.0f, 4001.0f },
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. We seem to get null descriptor instead. */
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. */
};
/* Modify just one VBO. */
struct indirect_vbo_one_data
{
D3D12_VERTEX_BUFFER_VIEW view;
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
};
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_one_sig[2] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
};
/* Fill buffer locations later. */
struct indirect_vbo_one_data indirect_vbo_one_data[] =
{
{
.view = { 0, 64, 8 },
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
},
{
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 1 }
}
};
static const struct vec4 indirect_vbo_one_expected[] =
{
{ 1128.0f, 2064.0f, 3000.0f, 4000.0f },
{ 1130.0f, 2065.0f, 3000.0f, 4000.0f },
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
};
/* Indirect IBO */
struct indirect_ibo_data
{
D3D12_INDEX_BUFFER_VIEW view;
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
};
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_ibo_sig[2] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW },
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
};
struct indirect_ibo_data indirect_ibo_data[] =
{
{
.view = { 0, 0, DXGI_FORMAT_R32_UINT },
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
},
{
.view = { 0, 64, DXGI_FORMAT_R16_UINT },
.indexed = { .IndexCountPerInstance = 4, .InstanceCount = 1 }
},
};
static const struct vec4 indirect_ibo_expected[] =
{
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
{ 1016.0f, 2080.0f, 3000.0f, 4000.0f },
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
{ 1017.0f, 2081.0f, 3000.0f, 4000.0f },
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
};
/* Indirect root arguments */
struct indirect_root_descriptor_data
{
D3D12_GPU_VIRTUAL_ADDRESS cbv;
D3D12_GPU_VIRTUAL_ADDRESS srv;
D3D12_GPU_VIRTUAL_ADDRESS uav;
D3D12_DRAW_ARGUMENTS array;
};
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_root_descriptor_sig[4] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 } },
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW, .ShaderResourceView = { .RootParameterIndex = 2 } },
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, .UnorderedAccessView = { .RootParameterIndex = 3 } },
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
};
struct indirect_root_descriptor_data indirect_root_descriptor_data[] =
{
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
};
static const struct vec4 indirect_root_descriptor_expected[] =
{
{ 1000.0f, 2064.0f, 3000.0f + 64.0f, 4000.0f + 2.0f },
{ 1000.0f, 2064.0f, 3000.0f + 128.0f, 4000.0f + 3.0f },
};
/* Test packing rules.
* 64-bit aligned values are tightly packed with 32-bit alignment when they are in indirect command buffers. */
struct indirect_alignment_data
{
float value;
uint32_t cbv_va[2];
D3D12_DRAW_ARGUMENTS arrays;
};
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_alignment_sig[3] =
{
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 1 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 }},
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
};
struct indirect_alignment_data indirect_alignment_data[] =
{
{
.value = 5.0f,
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
},
{
.value = 6.0f,
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
},
};
static const struct vec4 indirect_alignment_expected[] =
{
{ 1000.0f, 69.0f, 3064.0f, 4000.0f },
{ 1000.0f, 70.0f, 3128.0f, 4000.0f },
};
#define DECL_TEST(t, pso_index, needs_root_sig) { t##_sig, ARRAY_SIZE(t##_sig), t##_data, sizeof(t##_data), ARRAY_SIZE(t##_data), \
t##_expected, ARRAY_SIZE(t##_expected), sizeof(*(t##_data)), pso_index, needs_root_sig }
const struct test tests[] =
{
DECL_TEST(root_constant, 0, true),
DECL_TEST(indirect_vbo, 0, false),
DECL_TEST(indirect_vbo_one, 0, false),
DECL_TEST(indirect_ibo, 0, false),
DECL_TEST(indirect_root_descriptor, 0, true),
DECL_TEST(indirect_alignment, 0, true),
DECL_TEST(root_constant_spill, 1, true),
DECL_TEST(indirect_root_descriptor, 1, true),
};
#undef DECL_TEST
uint32_t ibo_data[ARRAY_SIZE(ibo)][64];
float vbo_data[ARRAY_SIZE(vbo)][64];
float generic_data[4096];
static const DWORD vs_code_small_cbv[] =
{
#if 0
cbuffer RootCBV : register(b0)
{
float a;
};
StructuredBuffer<float> RootSRV : register(t0);
cbuffer RootConstants : register(b0, space1)
{
float4 root;
};
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
{
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
}
#endif
0x43425844, 0x33b7b302, 0x34259b9b, 0x3e8568d9, 0x5a5e0c3e, 0x00000001, 0x00000268, 0x00000003,
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x070000a2, 0x00307e46,
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
0x0030800a, 0x00000001, 0x00000000, 0x00000000, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x00000000, 0x0b000000, 0x00102042, 0x00000000,
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x00000000,
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
0x00000000, 0x0100003e,
};
static const DWORD vs_code_large_cbv[] =
{
#if 0
cbuffer RootCBV : register(b0)
{
float a;
};
StructuredBuffer<float> RootSRV : register(t0);
cbuffer RootConstants : register(b0, space1)
{
// Cannot use arrays for root constants in D3D12.
float4 pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
float4 root;
};
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
{
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
}
#endif
0x43425844, 0x99a057e8, 0x20344569, 0x434f8a7a, 0xf9171e08, 0x00000001, 0x00000268, 0x00000003,
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x0000000c, 0x00000001, 0x070000a2, 0x00307e46,
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
0x0030800a, 0x00000001, 0x00000000, 0x0000000b, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x0000000b, 0x0b000000, 0x00102042, 0x00000000,
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x0000000b,
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
0x0000000b, 0x0100003e,
};
memset(&desc, 0, sizeof(desc));
desc.no_root_signature = true;
desc.no_pipeline = true;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
for (j = 0; j < ARRAY_SIZE(ibo); j++)
for (i = 0; i < ARRAY_SIZE(ibo_data[j]); i++)
ibo_data[j][i] = j * 16 + i;
for (j = 0; j < ARRAY_SIZE(vbo); j++)
for (i = 0; i < ARRAY_SIZE(vbo_data[j]); i++)
vbo_data[j][i] = (float)(j * ARRAY_SIZE(vbo_data[j]) + i);
for (i = 0; i < ARRAY_SIZE(generic_data); i++)
generic_data[i] = (float)i;
for (i = 0; i < ARRAY_SIZE(ibo); i++)
ibo[i] = create_upload_buffer(context.device, sizeof(ibo_data[i]), ibo_data[i]);
for (i = 0; i < ARRAY_SIZE(vbo); i++)
vbo[i] = create_upload_buffer(context.device, sizeof(vbo_data[i]), vbo_data[i]);
cbv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
srv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
uav = create_default_buffer(context.device, sizeof(generic_data),
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
indirect_vbo_data[0].view[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
indirect_vbo_data[0].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
indirect_vbo_data[1].view[0].BufferLocation = 0;
indirect_vbo_data[1].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]) + 64;
indirect_vbo_one_data[0].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
indirect_vbo_one_data[1].view.BufferLocation = 0;
indirect_ibo_data[1].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[1]);
indirect_root_descriptor_data[0].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 256;
indirect_root_descriptor_data[0].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 8;
indirect_root_descriptor_data[0].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 4;
indirect_root_descriptor_data[1].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 512;
indirect_root_descriptor_data[1].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 12;
indirect_root_descriptor_data[1].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 8;
memcpy(indirect_alignment_data[0].cbv_va, &indirect_root_descriptor_data[0].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
memcpy(indirect_alignment_data[1].cbv_va, &indirect_root_descriptor_data[1].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
memset(root_parameters, 0, sizeof(root_parameters));
root_signature_desc.pParameters = root_parameters;
root_signature_desc.NumParameters = ARRAY_SIZE(root_parameters);
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
root_parameters[0].Constants.RegisterSpace = 1;
root_parameters[0].Constants.Num32BitValues = 4;
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
root_parameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_parameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[0]);
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
root_parameters[0].Constants.Num32BitValues = 48;
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[1]);
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
memset(so_entries, 0, sizeof(so_entries));
so_entries[0].ComponentCount = 4;
so_entries[0].SemanticName = "SV_Position";
memset(&pso_desc, 0, sizeof(pso_desc));
pso_desc.VS.pShaderBytecode = vs_code_small_cbv;
pso_desc.VS.BytecodeLength = sizeof(vs_code_small_cbv);
pso_desc.StreamOutput.NumStrides = 1;
pso_desc.StreamOutput.pBufferStrides = &so_stride;
pso_desc.StreamOutput.pSODeclaration = so_entries;
pso_desc.StreamOutput.NumEntries = ARRAY_SIZE(so_entries);
pso_desc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;
pso_desc.pRootSignature = root_signatures[0];
pso_desc.SampleDesc.Count = 1;
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
pso_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc);
pso_desc.InputLayout.pInputElementDescs = layout_desc;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[0]);
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
pso_desc.VS.pShaderBytecode = vs_code_large_cbv;
pso_desc.VS.BytecodeLength = sizeof(vs_code_large_cbv);
pso_desc.pRootSignature = root_signatures[1];
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[1]);
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
/* Verify sanity checks.
* As per validation layers, there must be exactly one command in the signature.
* It must come last. Verify that we check for this. */
memset(&command_signature_desc, 0, sizeof(command_signature_desc));
command_signature_desc.NumArgumentDescs = 1;
command_signature_desc.pArgumentDescs = indirect_argument_descs;
command_signature_desc.ByteStride = sizeof(D3D12_VERTEX_BUFFER_VIEW);
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
&IID_ID3D12CommandSignature, (void**)&command_signature);
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
command_signature_desc.NumArgumentDescs = 2;
command_signature_desc.pArgumentDescs = indirect_argument_descs;
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_VERTEX_BUFFER_VIEW);
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
&IID_ID3D12CommandSignature, (void**)&command_signature);
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
&IID_ID3D12CommandSignature, (void**)&command_signature);
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
for (i = 0; i < ARRAY_SIZE(tests); i++)
{
struct vec4 expect_reset_state[2];
const struct vec4 *expect, *v;
uint32_t expected_output_size;
uint32_t clear_vbo_mask;
bool root_cbv;
uint32_t size;
vkd3d_test_set_context("Test %u", i);
command_signature_desc.ByteStride = tests[i].stride;
command_signature_desc.pArgumentDescs = tests[i].indirect_arguments;
command_signature_desc.NumArgumentDescs = tests[i].indirect_argument_count;
command_signature_desc.NodeMask = 0;
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc,
tests[i].needs_root_sig ? root_signatures[tests[i].pso_index] : NULL,
&IID_ID3D12CommandSignature, (void**)&command_signature);
/* Updating root CBV requires push BDA path, which we don't enable on NV by default yet. */
root_cbv = false;
for (j = 0; j < tests[i].indirect_argument_count; j++)
{
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW)
{
root_cbv = true;
break;
}
}
if (FAILED(hr))
{
if (root_cbv && is_nvidia_device(context.device))
skip("Creating indirect root CBV update failed. If the GPU is NVIDIA, try VKD3D_CONFIG=force_raw_va_cbv.\n");
else
skip("Failed creating command signature, skipping test.\n");
continue;
}
argument_buffer = create_upload_buffer(context.device, 256 * 1024, NULL);
argument_buffer_late = create_default_buffer(context.device, 256 * 1024,
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
#define UNALIGNED_ARGUMENT_BUFFER_OFFSET (64 * 1024 + 4)
#define UNALIGNED_COUNT_BUFFER_OFFSET (128 * 1024 + 4)
#define ALIGNED_COUNT_BUFFER_OFFSET (128 * 1024 + 4 * 1024)
{
uint8_t *ptr;
ID3D12Resource_Map(argument_buffer, 0, NULL, (void**)&ptr);
memcpy(ptr, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
memcpy(ptr + UNALIGNED_ARGUMENT_BUFFER_OFFSET, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
memcpy(ptr + UNALIGNED_COUNT_BUFFER_OFFSET, &tests[i].api_max_count, sizeof(tests[i].api_max_count));
memcpy(ptr + ALIGNED_COUNT_BUFFER_OFFSET, &tests[i].api_max_count, sizeof(tests[i].api_max_count));
ID3D12Resource_Unmap(argument_buffer, 0, NULL);
}
streamout_buffer = create_default_buffer(context.device, 64 * 1024,
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, root_signatures[tests[i].pso_index]);
ID3D12GraphicsCommandList_SetPipelineState(command_list, psos[tests[i].pso_index]);
sov.SizeInBytes = 64 * 1024 - sizeof(struct vec4);
sov.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer) + sizeof(struct vec4);
sov.BufferFilledSizeLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer);
ID3D12GraphicsCommandList_SOSetTargets(command_list, 0, 1, &sov);
/* Set up default rendering state. */
ibv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[0]);
ibv.SizeInBytes = sizeof(ibo_data[0]);
ibv.Format = DXGI_FORMAT_R32_UINT;
vbvs[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]);
vbvs[0].SizeInBytes = sizeof(vbo_data[0]);
vbvs[0].StrideInBytes = 4;
vbvs[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
vbvs[1].SizeInBytes = sizeof(vbo_data[1]);
vbvs[1].StrideInBytes = 4;
ID3D12GraphicsCommandList_IASetIndexBuffer(command_list, &ibv);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_POINTLIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, 2, vbvs);
for (j = 0; j < (tests[i].pso_index ? 12 : 1); j++)
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &values, 4 * j);
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
ID3D12Resource_GetGPUVirtualAddress(cbv));
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
ID3D12Resource_GetGPUVirtualAddress(srv));
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
ID3D12Resource_GetGPUVirtualAddress(uav));
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, tests[i].api_max_count,
argument_buffer, 0, NULL, 0);
/* Test equivalent call with indirect count. */
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
argument_buffer, UNALIGNED_ARGUMENT_BUFFER_OFFSET,
argument_buffer, UNALIGNED_COUNT_BUFFER_OFFSET);
/* Test equivalent, but now with late transition to INDIRECT. */
ID3D12GraphicsCommandList_CopyResource(command_list, argument_buffer_late, argument_buffer);
transition_resource_state(command_list, argument_buffer_late, D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
argument_buffer_late, 0, argument_buffer_late, ALIGNED_COUNT_BUFFER_OFFSET);
/* Root descriptors which are part of the state block are cleared to NULL. Recover them here
* since attempting to draw next test will crash GPU. */
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
ID3D12Resource_GetGPUVirtualAddress(cbv));
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
ID3D12Resource_GetGPUVirtualAddress(srv));
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
ID3D12Resource_GetGPUVirtualAddress(uav));
/* Other state is cleared to 0. */
ID3D12GraphicsCommandList_DrawInstanced(command_list, 2, 1, 0, 0);
transition_resource_state(command_list, streamout_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(streamout_buffer, DXGI_FORMAT_R32G32B32A32_FLOAT, &rb, queue, command_list);
reset_command_list(command_list, context.allocator);
expected_output_size = (tests[i].expected_output_count * 3 + 2) * sizeof(struct vec4);
size = get_readback_uint(&rb, 0, 0, 0);
ok(size == expected_output_size, "Expected size %u, got %u.\n", expected_output_size, size);
for (j = 0; j < tests[i].expected_output_count; j++)
{
expect = &tests[i].expected_output[j];
v = get_readback_vec4(&rb, j + 1, 0);
ok(compare_vec4(v, expect, 0), "Element (direct count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
v = get_readback_vec4(&rb, j + tests[i].expected_output_count + 1, 0);
ok(compare_vec4(v, expect, 0), "Element (indirect count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
v = get_readback_vec4(&rb, j + 2 * tests[i].expected_output_count + 1, 0);
ok(compare_vec4(v, expect, 0), "Element (late latch) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
}
clear_vbo_mask = 0;
expect_reset_state[0] = values;
/* Root constant state is cleared to zero if it's part of the signature. */
for (j = 0; j < tests[i].indirect_argument_count; j++)
{
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT)
{
for (k = 0; k < tests[i].indirect_arguments[j].Constant.Num32BitValuesToSet; k++)
(&expect_reset_state[0].x)[(tests[i].indirect_arguments[j].Constant.DestOffsetIn32BitValues + k) % 4] = 0.0f;
}
else if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW)
clear_vbo_mask |= 1u << tests[i].indirect_arguments[j].VertexBuffer.Slot;
}
expect_reset_state[1] = expect_reset_state[0];
/* VBO/IBO state is cleared to zero if it's part of the signature.
* A NULL IBO should be seen as a IBO which only reads 0 index. */
if (!(clear_vbo_mask & (1u << 0)))
expect_reset_state[1].x += 1.0f;
if (!(clear_vbo_mask & (1u << 1)))
{
expect_reset_state[0].y += 64.0f;
expect_reset_state[1].y += 65.0f;
}
for (j = 0; j < 2; j++)
{
v = get_readback_vec4(&rb, j + 1 + 3 * tests[i].expected_output_count, 0);
expect = &expect_reset_state[j];
ok(compare_vec4(v, expect, 0), "Post-reset element %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
}
ID3D12CommandSignature_Release(command_signature);
ID3D12Resource_Release(argument_buffer);
ID3D12Resource_Release(argument_buffer_late);
ID3D12Resource_Release(streamout_buffer);
release_resource_readback(&rb);
}
vkd3d_test_set_context(NULL);
for (i = 0; i < ARRAY_SIZE(psos); i++)
ID3D12PipelineState_Release(psos[i]);
for (i = 0; i < ARRAY_SIZE(root_signatures); i++)
ID3D12RootSignature_Release(root_signatures[i]);
for (i = 0; i < ARRAY_SIZE(vbo); i++)
ID3D12Resource_Release(vbo[i]);
for (i = 0; i < ARRAY_SIZE(ibo); i++)
ID3D12Resource_Release(ibo[i]);
ID3D12Resource_Release(cbv);
ID3D12Resource_Release(srv);
ID3D12Resource_Release(uav);
destroy_test_context(&context);
}
void test_execute_indirect(void)
{
ID3D12Resource *argument_buffer, *count_buffer, *uav;

View File

@ -554,9 +554,9 @@ void test_copy_texture_buffer(void)
void test_copy_buffer_to_depth_stencil(void)
{
ID3D12Resource *src_buffer_stencil = NULL;
ID3D12GraphicsCommandList *command_list;
struct resource_readback rb_stencil;
ID3D12Resource *src_buffer_stencil;
struct resource_readback rb_depth;
ID3D12Resource *src_buffer_depth;
struct test_context_desc desc;

View File

@ -4552,41 +4552,31 @@ void test_typed_srv_uav_cast(void)
{ DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_FLOAT, false, false },
{ DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_FLOAT, false, false },
/* 5.3.9.5 from D3D11 functional spec. 32-bit typeless formats
* can be viewed as R32{U,I,F}. The D3D12 validation runtime appears to be buggy
* and also allows fully typed views even if bits per component don't match.
* This feature is derived from legacy D3D11 jank, so assume the validation layers are
* just buggy. */
/* Special D3D11 magic. For UAVs, we can reinterpret formats as the "always supported" types R32{U,I,F}.
* If typeless, we can cast to any R32U/I/F format.
* If not typeless, we follow float <-> non-float ban. */
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
{ DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R32_FLOAT, false, true },
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_FLOAT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_UINT, false, false },
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_SINT, false, false },
/* D3D12 validation does not complain about these, but it should according to D3D11 functional spec.
* No docs for D3D12 say otherwise.
* These tests can trip assertions in drivers since we will not emit MUTABLE at all
* for some of these tests. */
#if 0
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_FLOAT, false, true },
#endif
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_UINT, false, true },
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_SINT, false, true },
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_FLOAT, false, true },
};
if (!init_compute_test_context(&context))
@ -4887,633 +4877,3 @@ void test_typed_srv_cast_clear(void)
ID3D12DescriptorHeap_Release(heap);
destroy_test_context(&context);
}
void test_uav_3d_sliced_view(void)
{
D3D12_UNORDERED_ACCESS_VIEW_DESC uav;
D3D12_ROOT_SIGNATURE_DESC rs_desc;
D3D12_ROOT_PARAMETER root_params[2];
ID3D12PipelineState *pso_poison;
ID3D12PipelineState *pso_actual;
struct resource_readback rb[2];
D3D12_DESCRIPTOR_RANGE range;
D3D12_GPU_DESCRIPTOR_HANDLE h;
uint32_t reference[16][4][4];
struct test_context context;
ID3D12DescriptorHeap *heap;
ID3D12Resource *resource;
unsigned int x, y, z;
unsigned int i;
static const DWORD cs_actual_dxbc[] =
{
#if 0
cbuffer C : register(b0) { uint value; }
RWTexture3D<uint> T : register(u0);
[numthreads(4, 4, 16)]
void main(uint3 thr : SV_DispatchThreadID)
{
uint w, h, d;
T.GetDimensions(w, h, d);
if (thr.z < d)
T[thr] = value | (w << 8) | (h << 16) | (d << 24);
}
#endif
0x43425844, 0xf1736792, 0x8492219a, 0x6751cced, 0xf0219682, 0x00000001, 0x00000188, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000134, 0x00050050, 0x0000004d, 0x0100086a,
0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444,
0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000004, 0x00000004, 0x00000010,
0x8900103d, 0x80000142, 0x00111103, 0x00100072, 0x00000000, 0x00004001, 0x00000000, 0x0011ee46,
0x00000000, 0x0600004f, 0x00100082, 0x00000000, 0x0002002a, 0x0010002a, 0x00000000, 0x0304001f,
0x0010003a, 0x00000000, 0x0a000029, 0x00100072, 0x00000000, 0x00100246, 0x00000000, 0x00004002,
0x00000008, 0x00000010, 0x00000018, 0x00000000, 0x0800003c, 0x00100012, 0x00000000, 0x0010000a,
0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x0700003c, 0x00100012, 0x00000000, 0x0010001a,
0x00000000, 0x0010000a, 0x00000000, 0x0700003c, 0x00100012, 0x00000000, 0x0010002a, 0x00000000,
0x0010000a, 0x00000000, 0x060000a4, 0x0011e0f2, 0x00000000, 0x00020a46, 0x00100006, 0x00000000,
0x01000015, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE cs_actual = SHADER_BYTECODE(cs_actual_dxbc);
static const DWORD cs_poison_dxbc[] =
{
#if 0
cbuffer C : register(b0) { uint value; }
RWTexture3D<uint> T : register(u0);
[numthreads(4, 4, 16)]
void main(uint3 thr : SV_DispatchThreadID)
{
T[thr] = 0xdeadca7;
}
#endif
0x43425844, 0x4c99e486, 0x7707bd40, 0xceb3b496, 0xe22f4397, 0x00000001, 0x000000b0, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x0000005c, 0x00050050, 0x00000017, 0x0100086a,
0x0400289c, 0x0011e000, 0x00000000, 0x00004444, 0x0200005f, 0x00020072, 0x0400009b, 0x00000004,
0x00000004, 0x00000010, 0x090000a4, 0x0011e0f2, 0x00000000, 0x00020a46, 0x00004002, 0x0deadca7,
0x0deadca7, 0x0deadca7, 0x0deadca7, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE cs_poison = SHADER_BYTECODE(cs_poison_dxbc);
static const D3D12_TEX3D_UAV slices[] =
{
/* Just to clear everything */
{ 0, 0, -1u }, /* -1 means all remaining slices. */
{ 1, 0, -1u },
/* ... */
{ 0, 0, 2 },
{ 0, 5, 3 },
{ 0, 9, 1 },
{ 0, 12, 4 },
{ 0, 10, 5 },
{ 1, 0, 2 },
{ 1, 4, 3 },
{ 0, 15, -1u },
/* WSize = 0 is not allowed. Trips DEVICE_LOST. */
};
if (!init_compute_test_context(&context))
return;
memset(&rs_desc, 0, sizeof(rs_desc));
memset(root_params, 0, sizeof(root_params));
memset(&range, 0, sizeof(range));
rs_desc.NumParameters = ARRAY_SIZE(root_params);
rs_desc.pParameters = root_params;
root_params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_params[0].DescriptorTable.NumDescriptorRanges = 1;
root_params[0].DescriptorTable.pDescriptorRanges = &range;
range.NumDescriptors = 1;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
root_params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
root_params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_params[1].Constants.Num32BitValues = 1;
create_root_signature(context.device, &rs_desc, &context.root_signature);
pso_actual = create_compute_pipeline_state(context.device, context.root_signature, cs_actual);
pso_poison = create_compute_pipeline_state(context.device, context.root_signature, cs_poison);
resource = create_default_texture3d(context.device, 4, 4, 16, 2, DXGI_FORMAT_R32_UINT,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
memset(&uav, 0, sizeof(uav));
uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
uav.Format = DXGI_FORMAT_R32_UINT;
heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, ARRAY_SIZE(slices));
for (i = 0; i < ARRAY_SIZE(slices); i++)
{
D3D12_CPU_DESCRIPTOR_HANDLE h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap);
h.ptr += i * ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
uav.Texture3D = slices[i];
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav, h);
}
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &heap);
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
h = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap);
for (i = 0; i < ARRAY_SIZE(slices); i++)
{
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, h);
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(context.list, 1, i + 1, 0);
/* First, attempt to flood the descriptor with writes. Validates robustness. */
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso_poison);
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
uav_barrier(context.list, resource);
/* Now, only write in bounds. Makes sure FirstWSlice offset works. */
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso_actual);
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
uav_barrier(context.list, resource);
h.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_texture_readback_with_command_list(resource, 0, &rb[0], context.queue, context.list);
for (i = 0; i < ARRAY_SIZE(slices); i++)
{
unsigned int num_slices;
if (slices[i].MipSlice != 0)
continue;
num_slices = min(16 - slices[i].FirstWSlice, slices[i].WSize);
for (z = 0; z < num_slices; z++)
{
for (y = 0; y < 4; y++)
{
for (x = 0; x < 4; x++)
{
uint32_t *ref = &reference[z + slices[i].FirstWSlice][y][x];
*ref = i + 1;
*ref |= 4 << 8;
*ref |= 4 << 16;
*ref |= num_slices << 24;
}
}
}
}
for (z = 0; z < 16; z++)
{
for (y = 0; y < 4; y++)
{
for (x = 0; x < 4; x++)
{
uint32_t value;
value = get_readback_uint(&rb[0], x, y, z);
todo ok(value == reference[z][y][x], "Error for mip 0 at %u, %u, %u. Got %x, expected %x.\n", x, y, z, value, reference[z][y][x]);
}
}
}
reset_command_list(context.list, context.allocator);
get_texture_readback_with_command_list(resource, 1, &rb[1], context.queue, context.list);
for (i = 0; i < ARRAY_SIZE(slices); i++)
{
unsigned int num_slices;
if (slices[i].MipSlice != 1)
continue;
num_slices = min(8 - slices[i].FirstWSlice, slices[i].WSize);
for (z = 0; z < num_slices; z++)
{
for (y = 0; y < 2; y++)
{
for (x = 0; x < 2; x++)
{
uint32_t *ref = &reference[z + slices[i].FirstWSlice][y][x];
*ref = i + 1;
*ref |= 2 << 8;
*ref |= 2 << 16;
*ref |= num_slices << 24;
}
}
}
}
for (z = 0; z < 8; z++)
{
for (y = 0; y < 2; y++)
{
for (x = 0; x < 2; x++)
{
uint32_t value;
value = get_readback_uint(&rb[1], x, y, z);
todo ok(value == reference[z][y][x], "Error for mip 1 at %u, %u, %u. Got %x, expected %x.\n", x, y, z, value, reference[z][y][x]);
}
}
}
for (i = 0; i < ARRAY_SIZE(rb); i++)
release_resource_readback(&rb[i]);
ID3D12Resource_Release(resource);
ID3D12PipelineState_Release(pso_actual);
ID3D12PipelineState_Release(pso_poison);
ID3D12DescriptorHeap_Release(heap);
destroy_test_context(&context);
}
void test_root_descriptor_offset_sign(void)
{
/* Exploratory test in nature. Will likely crash GPU if not on native drivers. Tweak ifdef to run it. */
#if 1
skip("Skipping exploratory test for root descriptor over/underflow test.\n");
#else
ID3D12RootSignature *root_signature;
D3D12_ROOT_PARAMETER root_params[3];
D3D12_ROOT_SIGNATURE_DESC rs_desc;
ID3D12Resource *output_buffer;
ID3D12Resource *input_buffer;
struct resource_readback rb;
struct test_context context;
ID3D12PipelineState *pso;
uint32_t values[4];
unsigned int i;
static const BYTE cs_code_dxil[] =
{
#if 0
RWStructuredBuffer<uint4> RW : register(u0);
StructuredBuffer<uint> R0 : register(t0);
ByteAddressBuffer R1 : register(t1);
[numthreads(1, 1, 1)]
void main()
{
uint a = R0[-1]; // Negative index
uint b = R0[1u << 30]; // offset 4 GB. Does it overflow back to 0?
uint c = R1.Load(-4); // Negative offset
uint d = R1.Load(0);
RW[0] = uint4(a, b, c, d);
}
#endif
0x44, 0x58, 0x42, 0x43, 0xac, 0xbf, 0xf4, 0x1f, 0x2f, 0x84, 0x34, 0x51, 0x10, 0xd2, 0xe1, 0x21, 0x95, 0x3b, 0xc5, 0x21, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe5, 0x8f, 0xa6, 0x7e, 0x5d, 0xa7, 0xe6, 0xd6, 0x02, 0xac, 0xbd, 0xbf, 0x6f, 0x1b, 0xee, 0xc4, 0x44, 0x58, 0x49, 0x4c,
0xe8, 0x05, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x7a, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xd0, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
0x21, 0x0c, 0x00, 0x00, 0x71, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
0x51, 0x18, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xaa, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0x00, 0x00, 0x00,
0x49, 0x18, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04,
0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x60, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0x10, 0x1a, 0xf7,
0x0c, 0x97, 0x3f, 0x61, 0x0f, 0x21, 0xf9, 0x21, 0xd0, 0x0c, 0x0b, 0x81, 0x02, 0x32, 0x47, 0x00, 0x06, 0x73, 0x04, 0x41, 0x31, 0x8a, 0x19, 0xc6, 0x1c, 0x42, 0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f,
0x21, 0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x23, 0xb7, 0x8d, 0x0a, 0x63, 0x8c, 0x31, 0xa5, 0x50, 0xa6, 0x18, 0x43, 0xab, 0x28, 0xc0, 0x14, 0x63, 0x8c, 0x31, 0x66, 0x50, 0x1b, 0x08, 0x98, 0x89,
0x0c, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1, 0x0d, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81, 0x1e, 0xea, 0x41, 0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1, 0x1c, 0xca,
0x41, 0x1e, 0xf8, 0xa0, 0x1e, 0xdc, 0x61, 0x1e, 0xd2, 0xe1, 0x1c, 0xdc, 0xa1, 0x1c, 0xc8, 0x01, 0x0c, 0xd2, 0xc1, 0x1d, 0xe8, 0xc1, 0x0f, 0x50, 0x60, 0x08, 0x1e, 0x26, 0x4d, 0x11, 0x25, 0x4c,
0xfe, 0x86, 0x4d, 0x84, 0x36, 0x0c, 0x11, 0x21, 0x49, 0x1b, 0x55, 0x14, 0x44, 0x84, 0x02, 0x43, 0x72, 0x18, 0x81, 0x30, 0x66, 0x92, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03,
0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xb0, 0x87, 0x72, 0x18,
0x07, 0x7a, 0x78, 0x07, 0x79, 0xe0, 0x83, 0x7a, 0x70, 0x87, 0x79, 0x48, 0x87, 0x73, 0x70, 0x87, 0x72, 0x20, 0x07, 0x30, 0x48, 0x07, 0x77, 0xa0, 0x07, 0x36, 0x00, 0x03, 0x3a, 0xf0, 0x03, 0x30,
0xf0, 0x03, 0x14, 0x50, 0xaa, 0x73, 0x04, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50,
0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07,
0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0,
0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x86, 0x3c, 0x08, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x16, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x34,
0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x05, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
0xc6, 0x04, 0x43, 0x32, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x51, 0x08, 0x65, 0x51, 0x20, 0x74, 0x46, 0x00, 0xe8, 0x16, 0x08, 0xcd, 0x19, 0x00, 0xb2, 0x33, 0x00, 0x14, 0x67, 0x00, 0x00, 0x00,
0x79, 0x18, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x04,
0x83, 0x98, 0x20, 0x18, 0xc5, 0x06, 0x61, 0x20, 0x26, 0x08, 0x86, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xd1, 0x43, 0x60, 0x82, 0x60, 0x1c, 0x13, 0x04,
0x03, 0xd9, 0x20, 0x0c, 0xcd, 0x86, 0x84, 0x50, 0x16, 0x82, 0x18, 0x18, 0xc2, 0x99, 0x20, 0x4c, 0xcd, 0x04, 0xc1, 0x48, 0x36, 0x24, 0x03, 0xb4, 0x10, 0xc3, 0x10, 0x11, 0xc0, 0x06, 0xe1, 0x91,
0x26, 0x08, 0x96, 0x33, 0x41, 0x38, 0x96, 0x09, 0x82, 0xa1, 0x6c, 0x10, 0x06, 0x6b, 0xc3, 0x42, 0x50, 0x0b, 0x41, 0x0c, 0x4c, 0x55, 0x55, 0xd7, 0x86, 0x00, 0xdb, 0x40, 0x4c, 0x19, 0x00, 0x4c,
0x10, 0x04, 0x80, 0x44, 0x5b, 0x58, 0x9a, 0xdb, 0x04, 0xe1, 0x62, 0x36, 0x0c, 0xc3, 0x30, 0x6c, 0x20, 0x88, 0xae, 0xf1, 0x36, 0x14, 0x1b, 0x07, 0x68, 0x5f, 0x15, 0x36, 0x36, 0xbb, 0x36, 0x97,
0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c, 0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b,
0x12, 0x18, 0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x48, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2, 0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2,
0xb9, 0x29, 0x41, 0x56, 0x87, 0x0c, 0xcf, 0xa5, 0xcc, 0x8d, 0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd, 0x8d, 0x6e, 0x6e, 0x4a, 0xf0, 0x01, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x8c, 0xc8, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x72, 0x10, 0x87, 0x73, 0x70, 0x03, 0x7b, 0x08, 0x07, 0x79, 0x60, 0x87, 0x70, 0xc8, 0x87, 0x77, 0xa8, 0x07, 0x7a,
0x00, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x26, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0xdb, 0x40, 0x35,
0x5c, 0xbe, 0xf3, 0xf8, 0x01, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0xc8, 0x6d, 0x5b, 0x80, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x42, 0x44, 0x00, 0x13, 0x11, 0x02, 0xcd, 0xb0, 0x10, 0x06,
0x40, 0x30, 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x34, 0x46, 0x00, 0x4a,
0xa0, 0x3c, 0xc8, 0x14, 0x62, 0x40, 0xc9, 0xcd, 0x00, 0xd4, 0x40, 0x01, 0x02, 0x02, 0x02, 0x22, 0x54, 0x42, 0x29, 0x06, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x80, 0x64,
0x46, 0x52, 0x55, 0xcf, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x20, 0xda, 0xb1, 0x5c, 0x17, 0x34, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0xc8, 0x86, 0x30, 0xd7, 0x15, 0x8d, 0x18, 0x20, 0x00, 0x08,
0x82, 0xc1, 0xb2, 0x29, 0xc1, 0x81, 0x8d, 0x26, 0x04, 0xc0, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x1d, 0x33, 0x20, 0xda, 0x68, 0x42, 0x00, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf2,
0x39, 0x46, 0xc2, 0x8c, 0x26, 0x04, 0xc0, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x61, 0x00, 0x21, 0x9e, 0x33, 0x9a, 0x10, 0x00, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0xd0, 0x88, 0x01, 0xb3,
0x80, 0x01, 0x18, 0x1c, 0xc5, 0x10, 0x4c, 0x08, 0x00, 0x00, 0x00, 0x00,
};
static const D3D12_SHADER_BYTECODE cs_code = SHADER_BYTECODE(cs_code_dxil);
static const uint32_t test_data[] = { 0, 1, 2, 3, 4, 5, 6, 7 };
memset(&rs_desc, 0, sizeof(rs_desc));
rs_desc.NumParameters = ARRAY_SIZE(root_params);
rs_desc.pParameters = root_params;
memset(root_params, 0, sizeof(root_params));
root_params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
root_params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
root_params[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_params[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
root_params[2].Descriptor.ShaderRegister = 1;
if (!init_compute_test_context(&context))
return;
if (!context_supports_dxil(&context))
{
destroy_test_context(&context);
return;
}
input_buffer = create_upload_buffer(context.device, sizeof(test_data), test_data);
output_buffer = create_default_buffer(context.device, 16,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
create_root_signature(context.device, &rs_desc, &root_signature);
pso = create_compute_pipeline_state(context.device, root_signature, cs_code);
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso);
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature);
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0,
ID3D12Resource_GetGPUVirtualAddress(output_buffer));
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 1,
ID3D12Resource_GetGPUVirtualAddress(input_buffer) + 16);
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 2,
ID3D12Resource_GetGPUVirtualAddress(input_buffer) + 16);
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
transition_resource_state(context.list, output_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(output_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
for (i = 0; i < 4; i++)
values[i] = get_readback_uint(&rb, i, 0, 0);
skip("Got structured access [-1] = #%x\n", values[0]);
skip("Got structured access [1u << 30] = #%x\n", values[1]);
skip("Got byte address [-4] = #%x\n", values[2]);
skip("Got byte address [0] = #%x\n", values[3]);
/* Observed on AMD:
test_root_descriptor_offset_sign:5262: Test skipped: Got structured access [-1] = #4b416743 <-- Garbage. Likely we accessed garbage memory way out at (4 * UINT_MAX) & UINT_MAX offset.
test_root_descriptor_offset_sign:5263: Test skipped: Got structured access [1u << 30] = #4 <-- Suggests 32-bit uint offset.
test_root_descriptor_offset_sign:5264: Test skipped: Got byte address [-4] = #0 <-- Suggests we hit robustness for driver generated descriptor.
test_root_descriptor_offset_sign:5265: Test skipped: Got byte address [0] = #4
*/
/* Observed on NV: Blue screen of death (?!?!). */
/* Observed on Intel: All 0. Likely faulted and terminated the dispatch before we could write results. */
ID3D12RootSignature_Release(root_signature);
ID3D12PipelineState_Release(pso);
ID3D12Resource_Release(input_buffer);
ID3D12Resource_Release(output_buffer);
release_resource_readback(&rb);
destroy_test_context(&context);
#endif
}
static void test_uav_counters_null_behavior(bool use_dxil)
{
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
D3D12_ROOT_SIGNATURE_DESC rs_desc;
ID3D12DescriptorHeap *cpu_heap;
D3D12_ROOT_PARAMETER rs_param;
D3D12_DESCRIPTOR_RANGE range;
struct test_context context;
struct resource_readback rb;
ID3D12DescriptorHeap *heap;
ID3D12Resource *resource;
unsigned int i;
#if 0
RWStructuredBuffer<uint> RWBuf[4] : register(u0);
[numthreads(1, 1, 1)]
void main(int wg : SV_GroupID)
{
RWBuf[wg >> 2][wg & 3] = RWBuf[wg >> 2].IncrementCounter() + 64;
}
#endif
static const DWORD cs_code_dxbc[] =
{
0x43425844, 0xb5433247, 0x4cd30f6c, 0x58100e67, 0xc179ade1, 0x00000001, 0x00000134, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e0, 0x00050051, 0x00000038, 0x0100086a,
0x0700009e, 0x0031ee46, 0x00000000, 0x00000000, 0x00000003, 0x00000004, 0x00000000, 0x0200005f,
0x00021012, 0x02000068, 0x00000002, 0x0400009b, 0x00000001, 0x00000001, 0x00000001, 0x0600002a,
0x00100012, 0x00000000, 0x0002100a, 0x00004001, 0x00000002, 0x06000001, 0x00100022, 0x00000000,
0x0002100a, 0x00004001, 0x00000003, 0x070000b2, 0x00100012, 0x00000001, 0x0421e000, 0x00000000,
0x0010000a, 0x00000000, 0x0700001e, 0x00100042, 0x00000000, 0x0010000a, 0x00000001, 0x00004001,
0x00000040, 0x0b0000a8, 0x0421e012, 0x00000000, 0x0010000a, 0x00000000, 0x0010001a, 0x00000000,
0x00004001, 0x00000000, 0x0010002a, 0x00000000, 0x0100003e,
};
static const BYTE cs_code_dxil[] =
{
0x44, 0x58, 0x42, 0x43, 0xc6, 0xfe, 0xe1, 0x77, 0xd8, 0x5c, 0x56, 0xc7, 0x6e, 0xf7, 0xe2, 0xf7, 0xb3, 0xb0, 0x40, 0xe0, 0x01, 0x00, 0x00, 0x00, 0x34, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x60, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xc1, 0xf5, 0xe2,
0x29, 0x0a, 0x7c, 0x68, 0x4a, 0xfa, 0x15, 0xe9, 0x1a, 0x85, 0x63, 0x21, 0x44, 0x58, 0x49, 0x4c, 0x40, 0x05, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x50, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c,
0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x47, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02,
0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90,
0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84,
0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x54, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73,
0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0xc1, 0x29, 0xc4, 0x18, 0xc8, 0x50, 0x9a, 0x23, 0x08, 0x8a, 0x81, 0x86, 0x19, 0x63, 0x11, 0x2b,
0x0a, 0x18, 0x68, 0x8c, 0x31, 0xc6, 0x30, 0xe4, 0x06, 0x02, 0x66, 0x32, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71,
0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xa8, 0x07, 0x77, 0x98, 0x87, 0x74, 0x38, 0x07, 0x77, 0x28, 0x07, 0x72, 0x00, 0x83,
0x74, 0x70, 0x07, 0x7a, 0xf0, 0x03, 0x14, 0x8c, 0x24, 0x88, 0x24, 0xe7, 0x08, 0x40, 0x01, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90,
0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6,
0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x12, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x18, 0xf2, 0x30, 0x40, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x71, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x50, 0x18, 0xb4, 0x46, 0x00, 0x6a,
0x80, 0x68, 0x81, 0xd0, 0x9c, 0x01, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
0x53, 0x43, 0x60, 0x82, 0x30, 0x20, 0x13, 0x84, 0x21, 0x99, 0x20, 0x2c, 0xca, 0x04, 0x61, 0x59, 0x36, 0x08, 0x03, 0xb3, 0x61, 0x21, 0x94, 0x85, 0x20, 0x98, 0xc6, 0x79, 0x1c, 0x68, 0x43, 0x10,
0x6d, 0x20, 0x00, 0x09, 0x00, 0x26, 0x08, 0x02, 0x40, 0xa2, 0x2d, 0x2c, 0xcd, 0x6d, 0x82, 0x40, 0x31, 0x1b, 0x86, 0x61, 0x18, 0x36, 0x10, 0x84, 0xc5, 0x5c, 0x1b, 0x0a, 0xaa, 0x02, 0x26, 0xac,
0x0a, 0x1b, 0x9b, 0x5d, 0x9b, 0x4b, 0x1a, 0x59, 0x99, 0x1b, 0xdd, 0x94, 0x20, 0xa8, 0x42, 0x86, 0xe7, 0x62, 0x57, 0x26, 0x37, 0x97, 0xf6, 0xe6, 0x36, 0x25, 0x20, 0x9a, 0x90, 0xe1, 0xb9, 0xd8,
0x85, 0xb1, 0xd9, 0x95, 0xc9, 0x4d, 0x09, 0x8c, 0x3a, 0x64, 0x78, 0x2e, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x02, 0xa4, 0x0c, 0x19, 0x9e, 0x8b, 0x5c, 0xd9,
0xdc, 0x5b, 0x9d, 0xdc, 0x58, 0xd9, 0xdc, 0x94, 0x40, 0xaa, 0x43, 0x86, 0xe7, 0x52, 0xe6, 0x46, 0x27, 0x97, 0x07, 0xf5, 0x96, 0xe6, 0x46, 0x37, 0x37, 0x25, 0xc0, 0x00, 0x79, 0x18, 0x00, 0x00,
0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x50, 0x0d, 0x97,
0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x26, 0x90, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb9, 0xcf, 0x00, 0x4c, 0x04, 0xe7, 0x50,
0xcd, 0x44, 0x44, 0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x05, 0x44, 0xc3, 0xe5, 0x3b, 0x8f, 0x6f, 0x44, 0x0e, 0xf5, 0x88, 0x83, 0x8f,
0xdc, 0xb6, 0x01, 0x10, 0x0c, 0x80, 0x34, 0x00, 0x61, 0x20, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x8a,
0x37, 0xa0, 0x08, 0x4a, 0xae, 0x18, 0x03, 0x0a, 0x30, 0xa0, 0x0c, 0x4a, 0x31, 0x80, 0x4c, 0x09, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0x58, 0x48, 0x54, 0x01, 0x92,
0x15, 0x4c, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0xcc, 0x95, 0x10, 0x54, 0x00, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x92, 0x25, 0x41, 0x51, 0x41, 0x02, 0x65, 0x24, 0x3a, 0x62, 0xd0,
0x00, 0x20, 0x08, 0x06, 0x8e, 0x96, 0x10, 0x01, 0x26, 0x40, 0x10, 0x84, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
static const D3D12_SHADER_BYTECODE cs_dxbc = SHADER_BYTECODE(cs_code_dxbc);
static const D3D12_SHADER_BYTECODE cs_dxil = SHADER_BYTECODE(cs_code_dxil);
if (!init_compute_test_context(&context))
return;
if (use_dxil && !context_supports_dxil(&context))
{
skip("Context does not support DXIL.\n");
destroy_test_context(&context);
return;
}
memset(&rs_desc, 0, sizeof(rs_desc));
memset(&rs_param, 0, sizeof(rs_param));
memset(&range, 0, sizeof(range));
rs_desc.NumParameters = 1;
rs_desc.pParameters = &rs_param;
rs_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rs_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rs_param.DescriptorTable.NumDescriptorRanges = 1;
rs_param.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
range.NumDescriptors = 8;
create_root_signature(context.device, &rs_desc, &context.root_signature);
context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature, use_dxil ? cs_dxil : cs_dxbc);
cpu_heap = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 8);
heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 8);
resource = create_default_buffer(context.device, D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT * 9,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
memset(&uav_desc, 0, sizeof(uav_desc));
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
for (i = 0; i < 8; i++)
{
D3D12_CPU_DESCRIPTOR_HANDLE cpu_h, gpu_h;
cpu_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_heap);
gpu_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap);
cpu_h.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * i;
gpu_h.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * i;
uav_desc.Buffer.NumElements = 4;
uav_desc.Buffer.FirstElement = 4 * i;
uav_desc.Buffer.StructureByteStride = 4;
uav_desc.Buffer.CounterOffsetInBytes = D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT * (i + 1);
/* AMD drivers don't seem to clear the UAV counter if we pass in NULL, so
* test a path which does not do that. */
if (i < 4)
{
ID3D12Device_CreateUnorderedAccessView(context.device, resource, resource, &uav_desc, cpu_h);
ID3D12Device_CreateUnorderedAccessView(context.device, resource, resource, &uav_desc, gpu_h);
}
uav_desc.Buffer.CounterOffsetInBytes = 0;
/* Test writing NULL UAV counter after a non-NULL UAV counter. Makes sure that we are indeed supposed
* to clear out UAV counters to NULL every time. */
if ((i & 3) == 3)
{
ID3D12Device_CreateUnorderedAccessView(context.device, NULL, NULL, &uav_desc, cpu_h);
ID3D12Device_CreateUnorderedAccessView(context.device, NULL, NULL, &uav_desc, gpu_h);
}
else if ((i & 3) >= 1)
{
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, cpu_h);
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, gpu_h);
}
else
{
/* Test copy behavior. Make sure we correctly copy NULL counters as well. */
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, cpu_h);
ID3D12Device_CopyDescriptorsSimple(context.device, 1,
gpu_h, cpu_h, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
}
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &heap);
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0,
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap));
ID3D12GraphicsCommandList_Dispatch(context.list, 8 * 4, 1, 1);
transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(resource, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
for (i = 0; i < 8 * 4; i++)
{
/* Possible behavior is very varied here:
* NV: If UAV counter is NULL, NV makes the main descriptor robust.
* AMD: Writing NULL uav counter does not update the counter descriptor, the atomic update will still go through.
* Intel: Behaves as you would expect. Atomic op returns 0, writes to main descriptor behaves as you'd expect. */
uint32_t value = get_readback_uint(&rb, i, 0, 0);
ok(value == 0 || (value >= 64 && value < (64 + 4)), "Unexpected value %u = %u\n", i, value);
}
for (i = 0; i < 8; i++)
{
uint32_t value = get_readback_uint(&rb, (i + 1) * (D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT / 4), 0, 0);
if (i < 4)
{
/* AMD behavior: Passing NULL does not necessarily clear out UAV counter.
* It is undefined to access UAV counter like this.
* https://docs.microsoft.com/en-us/windows/win32/direct3d12/uav-counters
* "If a shader attempts to access the counter of a UAV that does not have an associated counter,
* then the debug layer will issue a warning,
* and a GPU page fault will occur causing the appss device to be removed." */
ok(value == 0 || value == 4, "Unexpected counter %u = %u.\n", i, value);
}
else
{
/* Technically undefined, but all drivers behave robustly here, we should too. */
ok(value == 0, "Unexpected counter %u = %u.\n", i, value);
}
}
release_resource_readback(&rb);
ID3D12DescriptorHeap_Release(heap);
ID3D12DescriptorHeap_Release(cpu_heap);
ID3D12Resource_Release(resource);
destroy_test_context(&context);
}
void test_uav_counter_null_behavior_dxbc(void)
{
test_uav_counters_null_behavior(false);
}
void test_uav_counter_null_behavior_dxil(void)
{
test_uav_counters_null_behavior(true);
}

View File

@ -1074,10 +1074,10 @@ void test_reset_command_allocator(void)
command_allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&command_list2);
ok(hr == S_OK, "Failed to create command list, hr %#x.\n", hr);
ID3D12GraphicsCommandList_Release(command_list);
ID3D12GraphicsCommandList_Release(command_list2);
ID3D12CommandAllocator_Release(command_allocator);
ID3D12CommandAllocator_Release(command_allocator2);
ID3D12GraphicsCommandList_Release(command_list);
ID3D12GraphicsCommandList_Release(command_list2);
}
refcount = ID3D12Device_Release(device);

View File

@ -2301,119 +2301,3 @@ void test_mismatching_pso_stages(void)
destroy_test_context(&context);
}
void test_pipeline_no_ps_nonzero_rts(void)
{
const FLOAT white[] = { 100.0f, 100.0f, 100.0f, 100.0f };
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
D3D12_ROOT_SIGNATURE_DESC rs_desc;
struct depth_stencil_resource ds;
D3D12_INPUT_LAYOUT_DESC layout;
D3D12_INPUT_ELEMENT_DESC elem;
struct test_context_desc desc;
D3D12_VERTEX_BUFFER_VIEW vbv;
struct test_context context;
ID3D12DescriptorHeap *rtv;
ID3D12Resource *vbo;
ID3D12Resource *rt;
D3D12_VIEWPORT vp;
D3D12_RECT sci;
static const FLOAT vbo_data[] =
{
-1.0f, -1.0f, 0.5f, 1.0f,
+3.0f, -1.0f, 0.5f, 1.0f,
-1.0f, +3.0f, 0.5f, 1.0f,
};
static const DWORD vs_code[] =
{
#if 0
float4 main(float4 a : A) : SV_Position
{
return a;
}
#endif
0x43425844, 0xecd820c8, 0x89ee4b40, 0xb73efa73, 0x4ed91573, 0x00000001, 0x000000d4, 0x00000003,
0x0000002c, 0x00000058, 0x0000008c, 0x4e475349, 0x00000024, 0x00000001, 0x00000008, 0x00000020,
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0xabab0041, 0x4e47534f, 0x0000002c,
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000040, 0x00010050, 0x00000010, 0x0100086a,
0x0300005f, 0x001010f2, 0x00000000, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x05000036,
0x001020f2, 0x00000000, 0x00101e46, 0x00000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
layout.NumElements = 1;
layout.pInputElementDescs = &elem;
memset(&elem, 0, sizeof(elem));
elem.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
elem.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
elem.SemanticName = "A";
memset(&desc, 0, sizeof(desc));
desc.no_pipeline = true;
desc.no_root_signature = true;
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
init_depth_stencil(&ds, context.device, 1, 1, 1, 1, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_D32_FLOAT, NULL);
rt = create_default_texture2d(context.device, 1, 1, 1, 1, DXGI_FORMAT_R32_FLOAT,
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_RESOURCE_STATE_RENDER_TARGET);
rtv = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1);
memset(&rs_desc, 0, sizeof(rs_desc));
rs_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
create_root_signature(context.device, &rs_desc, &context.root_signature);
init_pipeline_state_desc(&pso, context.root_signature, DXGI_FORMAT_R8G8B8A8_UNORM, &vs, NULL, &layout);
pso.DSVFormat = DXGI_FORMAT_D32_FLOAT;
pso.DepthStencilState.DepthEnable = TRUE;
pso.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
pso.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pso.PS.BytecodeLength = 0;
pso.PS.pShaderBytecode = NULL;
rtv_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(rtv);
ID3D12Device_CreateGraphicsPipelineState(context.device, &pso, &IID_ID3D12PipelineState, (void**)&context.pipeline_state);
ID3D12Device_CreateRenderTargetView(context.device, rt, NULL, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(context.list, rtv_handle, white, 0, NULL);
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(context.list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
set_viewport(&vp, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
ID3D12GraphicsCommandList_RSSetViewports(context.list, 1, &vp);
set_rect(&sci, 0, 0, 1, 1);
ID3D12GraphicsCommandList_RSSetScissorRects(context.list, 1, &sci);
ID3D12GraphicsCommandList_OMSetRenderTargets(context.list, 1, &rtv_handle, TRUE, &ds.dsv_handle);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(context.list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
vbo = create_upload_buffer(context.device, sizeof(vbo_data), vbo_data);
vbv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo);
vbv.SizeInBytes = sizeof(vbo_data);
vbv.StrideInBytes = 16;
ID3D12GraphicsCommandList_IASetVertexBuffers(context.list, 0, 1, &vbv);
ID3D12GraphicsCommandList_DrawInstanced(context.list, 3, 1, 0, 0);
transition_resource_state(context.list, rt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
/* Verify depth buffer was written to. */
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.5f, 0);
reset_command_list(context.list, context.allocator);
/* Verify that the invalid R32_FLOAT RTV was just ignored. */
check_sub_resource_float(rt, 0, context.queue, context.list, 100.0f, 0);
ID3D12Resource_Release(rt);
ID3D12Resource_Release(vbo);
ID3D12DescriptorHeap_Release(rtv);
destroy_depth_stencil(&ds);
destroy_test_context(&context);
}

File diff suppressed because it is too large Load Diff

View File

@ -24,8 +24,8 @@
void test_unbound_rtv_rendering(void)
{
static const struct vec4 white = { 1.0f, 1.0f, 1.0f, 1.0f };
static const struct vec4 red = { 1.0f, 0.0f, 0.0f, 1.0f };
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
D3D12_CPU_DESCRIPTOR_HANDLE rt_handle;
@ -91,8 +91,8 @@ void test_unbound_rtv_rendering(void)
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rt_handle, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rt_handle, &white.x, 0, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
@ -120,8 +120,7 @@ void test_unbound_rtv_rendering(void)
void test_unknown_rtv_format(void)
{
static const struct vec4 vec4_white = {1.0f, 1.0f, 1.0f, 1.0f};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
struct vec4 expected_vec4 = {0.0f, 0.0f, 0.0f, 1.0f};
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
@ -186,7 +185,7 @@ void test_unknown_rtv_format(void)
create_render_target(&context, &desc, &render_targets[1], &rtvs[2]);
for (i = 0; i < ARRAY_SIZE(rtvs); ++i)
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], &white.x, 0, NULL);
/* NULL RTV */
memset(&rtv_desc, 0, sizeof(rtv_desc));
@ -213,7 +212,7 @@ void test_unknown_rtv_format(void)
transition_resource_state(command_list, render_targets[1],
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &vec4_white, 0);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &white, 0);
reset_command_list(command_list, context.allocator);
expected_vec4.x = 2.0f;
check_sub_resource_vec4(render_targets[0], 0, queue, command_list, &expected_vec4, 0);

View File

@ -2638,8 +2638,7 @@ void test_stress_suballocation_thread(void *userdata)
{
/* Randomly allocate heaps and place a buffer on top of it. */
alloc_heap = rand_r(&seed) % 2 == 0;
/* Ensures we sometimes hit dedicated allocation paths. (2 MiB limit). */
alloc_size = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT * (1 + rand_r(&seed) % 40);
alloc_size = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT * (1 + rand_r(&seed) % 20);
keep_alive = rand_r(&seed) % 2 == 0;
if (buffers[i] && keep_alive)

View File

@ -1467,36 +1467,3 @@ void test_missing_bindings_root_signature(void)
destroy_test_context(&context);
}
void test_root_signature_empty_blob(void)
{
ID3D12RootSignature *root_signature;
struct test_context context;
HRESULT hr;
static const DWORD cs_code[] =
{
#if 0
RWStructuredBuffer<uint> RWBuf;
[numthreads(1, 1, 1)]
void main(int wg : SV_GroupID)
{
RWBuf[wg] = wg;
}
#endif
0x43425844, 0x81a88c98, 0x1ab24abd, 0xfdb8fb1f, 0x7e9cb035, 0x00000001, 0x000000a8, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000054, 0x00050050, 0x00000015, 0x0100086a,
0x0400009e, 0x0011e000, 0x00000000, 0x00000004, 0x0200005f, 0x00021012, 0x0400009b, 0x00000001,
0x00000001, 0x00000001, 0x070000a8, 0x0011e012, 0x00000000, 0x0002100a, 0x00004001, 0x00000000,
0x0002100a, 0x0100003e,
};
if (!init_compute_test_context(&context))
return;
hr = ID3D12Device_CreateRootSignature(context.device, 0, cs_code, sizeof(cs_code), &IID_ID3D12RootSignature, (void **)&root_signature);
/* Has to be E_FAIL, not E_INVALIDARG, oddly enough. */
ok(hr == E_FAIL, "Unexpected hr #%x.\n", hr);
destroy_test_context(&context);
}

View File

@ -5134,7 +5134,7 @@ void test_gather(void)
{0.3f, 1.3f, 1.2f, 0.2f}, {1.3f, 2.3f, 2.2f, 1.2f}, {2.3f, 3.3f, 3.2f, 2.2f}, {3.3f, 3.3f, 3.2f, 3.2f},
{0.3f, 1.3f, 1.3f, 0.3f}, {1.3f, 2.3f, 2.3f, 1.3f}, {2.3f, 3.3f, 3.3f, 2.3f}, {3.3f, 3.3f, 3.3f, 3.3f},
};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
static const D3D12_SUBRESOURCE_DATA resource_data = {&texture_data, sizeof(texture_data) / 4};
memset(&desc, 0, sizeof(desc));
@ -5171,7 +5171,7 @@ void test_gather(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, desc.rt_format, NULL, &ps_gather4, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5209,7 +5209,7 @@ void test_gather(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, desc.rt_format, NULL, &ps_gather4_offset, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5247,7 +5247,7 @@ void test_gather(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, desc.rt_format, NULL, &ps_gather4_green, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5285,7 +5285,7 @@ void test_gather(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, desc.rt_format, NULL, &ps_gather4_po, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5321,7 +5321,7 @@ void test_gather(void)
constants.offset_x = 0;
constants.offset_y = 0;
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5455,7 +5455,7 @@ void test_gather_c(void)
{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 1.0f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f},
{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 1.0f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f},
};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
static const D3D12_SUBRESOURCE_DATA resource_data = {&texture_data, sizeof(texture_data) / 4};
static const D3D12_STATIC_SAMPLER_DESC sampler_desc =
{
@ -5511,7 +5511,7 @@ void test_gather_c(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, desc.rt_format, NULL, &ps_gather4_c, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5549,7 +5549,7 @@ void test_gather_c(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, desc.rt_format, NULL, &ps_gather4_po_c, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -5585,7 +5585,7 @@ void test_gather_c(void)
constants.offset_x = 0;
constants.offset_y = 0;
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
@ -6281,7 +6281,7 @@ void test_multisample_array_texture(void)
};
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
static const float colors[][4] =
static const struct vec4 colors[] =
{
{1.0f, 0.0f, 0.0f, 1.0f},
{0.0f, 1.0f, 0.0f, 1.0f},
@ -6386,7 +6386,8 @@ void test_multisample_array_texture(void)
rtv_desc.Texture2DMSArray.FirstArraySlice = i;
rtv_desc.Texture2DMSArray.ArraySize = 1;
ID3D12Device_CreateRenderTargetView(device, texture, &rtv_desc, cpu_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, cpu_handle, colors[i], 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, cpu_handle, &colors[i].x, 0, NULL);
}
transition_resource_state(command_list, texture,

View File

@ -2131,8 +2131,8 @@ void test_sv_barycentric(void)
#define BARY_RES 128
static const D3D12_VIEWPORT vp = { 0, 0, BARY_RES, BARY_RES, 0, 1 };
static const D3D12_RECT sci = { 0, 0, BARY_RES, BARY_RES };
static const float white[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
static const D3D12_RECT sci = { 0, 0, BARY_RES, BARY_RES };
static const uint8_t provoking_lut[] = {
192, 212, 224, 244,
128, 144, 160, 176,
@ -4834,7 +4834,7 @@ void test_shader_sm66_is_helper_lane(void)
{
/* Oh, hi there. */
static const float alpha_keys[4] = { 0.75f, 2.25f, 3.25f, 3.75f };
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
static const struct vec4 white = { 1.0f, 1.0f, 1.0f, 1.0f };
D3D12_FEATURE_DATA_SHADER_MODEL shader_model;
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
@ -5041,7 +5041,7 @@ void test_shader_sm66_is_helper_lane(void)
ID3D12Device_CreateUnorderedAccessView(context.device, atomic_buffer, NULL, &uav_desc, cpu_handle);
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, 1, &heap);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
@ -5088,7 +5088,7 @@ void test_shader_sm66_is_helper_lane(void)
expected.w = 8881.0f;
}
else
memcpy(&expected, white, sizeof(white));
expected = white;
ok(compare_vec4(value, &expected, 0), "Mismatch pixel %u, %u, (%f %f %f %f) != (%f %f %f %f).\n",
x, y, expected.x, expected.y, expected.z, expected.w,

View File

@ -65,16 +65,6 @@ void test_get_resource_tiling(void)
/* Test buffers */
{ D3D12_RESOURCE_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 1024, 1, 1, 1, 1, 1, 0, 65536, 1, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 16*65536, 1, 1, 1, 16, 1, 0, 65536, 1, 1, D3D12_TILED_RESOURCES_TIER_1 },
/* Test small resource behavior */
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 1, 1, 1, 1, 1, 1, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 2, 2, 1, 2, 1, 2, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 4, 4, 1, 3, 1, 3, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 8, 8, 1, 4, 1, 4, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 16, 16, 1, 5, 1, 5, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 32, 32, 1, 6, 1, 6, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 64, 64, 1, 7, 1, 7, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 128, 128, 1, 8, 1, 8, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 256, 256, 1, 9, 2, 9, 1, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
/* Test various image formats */
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 512, 512, 1, 1, 4, 1, 1, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8_UNORM, 512, 512, 1, 1, 8, 1, 1, 256, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
@ -96,7 +86,7 @@ void test_get_resource_tiling(void)
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 128, 128, 1, 8, 1, 8, 1, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 512, 512, 1, 10, 21, 10, 3, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 512, 512, 4, 3, 84, 12, 3, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 1, 1, 1, 0, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 1, 0, 1, 0, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
/* Test 3D textures */
{ D3D12_RESOURCE_DIMENSION_TEXTURE3D, DXGI_FORMAT_R8_UNORM, 64, 64, 64, 1, 4, 1, 1, 64, 32, 32, D3D12_TILED_RESOURCES_TIER_3 },
{ D3D12_RESOURCE_DIMENSION_TEXTURE3D, DXGI_FORMAT_R8G8_UNORM, 64, 64, 64, 1, 8, 1, 1, 32, 32, 32, D3D12_TILED_RESOURCES_TIER_3 },
@ -223,10 +213,18 @@ void test_get_resource_tiling(void)
ok((packed_mip_info.NumTilesForPackedMips == 0) == (packed_mip_info.NumPackedMips == 0),
"Unexpected packed tile count %u.\n", packed_mip_info.NumTilesForPackedMips);
/* Docs say that tile shape should be cleared to zero if there is no standard mip, but drivers don't seem to care about this. */
ok(tile_shape.WidthInTexels == tests[i].tile_shape_w, "Unexpected tile width %u.\n", tile_shape.WidthInTexels);
ok(tile_shape.HeightInTexels == tests[i].tile_shape_h, "Unexpected tile height %u.\n", tile_shape.HeightInTexels);
ok(tile_shape.DepthInTexels == tests[i].tile_shape_d, "Unexpected tile depth %u.\n", tile_shape.DepthInTexels);
if (packed_mip_info.NumStandardMips || !packed_mip_info.NumPackedMips)
{
ok(tile_shape.WidthInTexels == tests[i].tile_shape_w, "Unexpected tile width %u.\n", tile_shape.WidthInTexels);
ok(tile_shape.HeightInTexels == tests[i].tile_shape_h, "Unexpected tile height %u.\n", tile_shape.HeightInTexels);
ok(tile_shape.DepthInTexels == tests[i].tile_shape_d, "Unexpected tile depth %u.\n", tile_shape.DepthInTexels);
}
else
{
ok(!tile_shape.WidthInTexels && !tile_shape.HeightInTexels && !tile_shape.DepthInTexels,
"Unexpected tile shape (%u,%u,%u) for packed resource.\n",
tile_shape.WidthInTexels, tile_shape.HeightInTexels, tile_shape.DepthInTexels);
}
for (j = 0; j < tests[i].expected_tiling_count; j++)
{
@ -3383,248 +3381,3 @@ void test_texture_feedback_instructions_dxil(void)
test_texture_feedback_instructions(true);
}
void test_sparse_buffer_memory_lifetime(void)
{
/* Attempt to bind sparse memory, then free the underlying heap, but keep the sparse resource
* alive. This should confuse drivers that attempt to track BO lifetimes. */
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
const UINT values[] = { 42, 42, 42, 42 };
D3D12_ROOT_PARAMETER root_parameters[2];
D3D12_TILE_REGION_SIZE region_size;
D3D12_GPU_DESCRIPTOR_HANDLE h_gpu;
D3D12_CPU_DESCRIPTOR_HANDLE h_cpu;
D3D12_ROOT_SIGNATURE_DESC rs_desc;
D3D12_DESCRIPTOR_RANGE desc_range;
struct test_context context;
struct resource_readback rb;
ID3D12DescriptorHeap *cpu;
ID3D12DescriptorHeap *gpu;
D3D12_HEAP_DESC heap_desc;
D3D12_RESOURCE_DESC desc;
ID3D12Resource *sparse;
ID3D12Resource *buffer;
ID3D12Heap *heap_live;
ID3D12Heap *heap;
unsigned int i;
HRESULT hr;
static const DWORD cs_sparse_query_dxbc[] =
{
#if 0
RWStructuredBuffer<uint> RWBuf : register(u0);
Buffer<uint> Buf : register(t0);
[numthreads(1, 1, 1)]
void main(uint thr : SV_DispatchThreadID)
{
uint code;
// Sample mapped, but freed memory. See what CheckAccessFullyMapped returns.
uint data = Buf.Load(thr, code);
uint value = CheckAccessFullyMapped(code) ? (1u << 16) : 0u;
value |= data & 0xffffu;
RWBuf[2 * thr + 0] = value;
// Sample not yet mapped memory. See what CheckAccessFullyMapped returns.
data = Buf.Load(thr + 1024 * 1024, code);
value = CheckAccessFullyMapped(code) ? (1u << 16) : 0u;
value |= data & 0xffffu;
RWBuf[2 * thr + 1] = value;
}
#endif
0x43425844, 0x8c2a40af, 0x2a9b20a6, 0xa99f0977, 0x37daacf5, 0x00000001, 0x00000280, 0x00000004,
0x00000030, 0x00000040, 0x00000050, 0x00000270, 0x4e475349, 0x00000008, 0x00000000, 0x00000008,
0x4e47534f, 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000218, 0x00050050, 0x00000086,
0x0100086a, 0x04000858, 0x00107000, 0x00000000, 0x00004444, 0x0400009e, 0x0011e000, 0x00000000,
0x00000004, 0x0200005f, 0x00020012, 0x02000068, 0x00000002, 0x0400009b, 0x00000001, 0x00000001,
0x00000001, 0x8a0000df, 0x80000042, 0x00111103, 0x00100012, 0x00000000, 0x00100012, 0x00000001,
0x00020006, 0x00107e46, 0x00000000, 0x050000ea, 0x00100022, 0x00000000, 0x0010000a, 0x00000001,
0x09000037, 0x00100022, 0x00000000, 0x0010001a, 0x00000000, 0x00004001, 0x00010000, 0x00004001,
0x00000000, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000010, 0x00004001, 0x00000000,
0x0010000a, 0x00000000, 0x0010001a, 0x00000000, 0x06000029, 0x00100022, 0x00000000, 0x0002000a,
0x00004001, 0x00000001, 0x090000a8, 0x0011e012, 0x00000000, 0x0010001a, 0x00000000, 0x00004001,
0x00000000, 0x0010000a, 0x00000000, 0x1300008c, 0x00100052, 0x00000000, 0x00004002, 0x00000014,
0x00000000, 0x0000001f, 0x00000000, 0x00004002, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
0x00020006, 0x00004002, 0x00100000, 0x00000000, 0x00000001, 0x00000000, 0x8b0000df, 0x80000042,
0x00111103, 0x00100012, 0x00000000, 0x00100012, 0x00000001, 0x00100006, 0x00000000, 0x00107e46,
0x00000000, 0x050000ea, 0x00100082, 0x00000000, 0x0010000a, 0x00000001, 0x09000037, 0x00100082,
0x00000000, 0x0010003a, 0x00000000, 0x00004001, 0x00010000, 0x00004001, 0x00000000, 0x0b00008c,
0x00100012, 0x00000000, 0x00004001, 0x00000010, 0x00004001, 0x00000000, 0x0010000a, 0x00000000,
0x0010003a, 0x00000000, 0x090000a8, 0x0011e012, 0x00000000, 0x0010002a, 0x00000000, 0x00004001,
0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x30494653, 0x00000008, 0x00000100, 0x00000000,
};
static const D3D12_SHADER_BYTECODE cs_sparse_query = SHADER_BYTECODE(cs_sparse_query_dxbc);
if (!init_compute_test_context(&context))
return;
hr = ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
ok(hr == S_OK, "Failed to check feature support, hr %#x.\n", hr);
if (options.TiledResourcesTier < D3D12_TILED_RESOURCES_TIER_2)
{
skip("Tiled resources Tier 2 not supported by device.\n");
destroy_test_context(&context);
return;
}
memset(&rs_desc, 0, sizeof(rs_desc));
memset(root_parameters, 0, sizeof(root_parameters));
memset(&desc_range, 0, sizeof(desc_range));
rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
rs_desc.pParameters = root_parameters;
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[1].DescriptorTable.pDescriptorRanges = &desc_range;
desc_range.NumDescriptors = 1;
desc_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
create_root_signature(context.device, &rs_desc, &context.root_signature);
context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature, cs_sparse_query);
memset(&heap_desc, 0, sizeof(heap_desc));
heap_desc.SizeInBytes = 4 * 1024 * 1024;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap);
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap_live);
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
memset(&desc, 0, sizeof(desc));
desc.Width = 64 * 1024 * 1024;
desc.Height = 1;
desc.DepthOrArraySize = 1;
desc.SampleDesc.Count = 1;
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
desc.MipLevels = 1;
desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
hr = ID3D12Device_CreateReservedResource(context.device, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
NULL, &IID_ID3D12Resource, (void**)&sparse);
ok(SUCCEEDED(hr), "Failed to create reserved resource, hr #%x.\n", hr);
{
const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { 0 };
const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NULL;
const UINT offset = 0;
const UINT count = desc.Width / (64 * 1024);
region_size.UseBox = FALSE;
region_size.NumTiles = desc.Width / (64 * 1024);
ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, &region_start_coordinate, &region_size,
NULL, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE);
}
region_size.UseBox = FALSE;
region_size.NumTiles = 1;
for (i = 0; i < 2; i++)
{
const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { i, 0, 0, 0 };
const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NONE;
const UINT offset = i;
const UINT count = 1;
ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, &region_start_coordinate, &region_size,
i == 0 ? heap : heap_live, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE);
}
wait_queue_idle(context.device, context.queue);
buffer = create_default_buffer(context.device, 128 * 1024,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST);
cpu = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
gpu = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
memset(&uav_desc, 0, sizeof(uav_desc));
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Format = DXGI_FORMAT_R32_UINT;
uav_desc.Buffer.NumElements = 128 * 1024 / 4;
uav_desc.Buffer.FirstElement = 0;
ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc,
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu));
ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc,
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu));
memset(&srv_desc, 0, sizeof(srv_desc));
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = 2 * 1024 * 1024;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Format = DXGI_FORMAT_R32_UINT;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
h_cpu = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu);
h_cpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
ID3D12Device_CreateShaderResourceView(context.device, sparse, &srv_desc, h_cpu);
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu);
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list,
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu),
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu), sparse, values, 0, NULL);
transition_resource_state(context.list, sparse,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 0, 128 * 1024);
transition_resource_state(context.list, buffer,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT,
&rb, context.queue, context.list);
reset_command_list(context.list, context.allocator);
ok(get_readback_uint(&rb, 0, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 0, 0, 0));
ok(get_readback_uint(&rb, 64 * 1024 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 64 * 1024 / 4, 0, 0));
release_resource_readback(&rb);
ID3D12Heap_Release(heap);
/* Access a resource where we can hypothetically access the freed heap memory. */
/* On AMD Windows native at least, if we read the freed region, we read garbage, which proves it's not required to unbind explicitly.
* We'd read 0 in that case. */
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 64 * 1024, 64 * 1024);
#define EXPLORE_UNDEFINED_BEHAVIOR 0
#if EXPLORE_UNDEFINED_BEHAVIOR
/* This reads unmapped memory. */
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 1024, sparse, 1024, 1024);
#endif
transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
h_gpu = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu);
h_gpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu);
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(buffer));
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 1, h_gpu);
#if EXPLORE_UNDEFINED_BEHAVIOR
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
#endif
transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT,
&rb, context.queue, context.list);
#if EXPLORE_UNDEFINED_BEHAVIOR
skip("Reading undefined value #%x.\n", get_readback_uint(&rb, 0, 0, 0));
skip("Reading value #%x (expect 0).\n", get_readback_uint(&rb, 1, 0, 0));
skip("Reading undefined value #%x.\n", get_readback_uint(&rb, 1024 / 4, 0, 0));
#endif
ok(get_readback_uint(&rb, 2048 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 2048 / 4, 0, 0));
ok(get_readback_uint(&rb, 64 * 1024 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 64 * 1024 / 4, 0, 0));
release_resource_readback(&rb);
ID3D12Resource_Release(buffer);
ID3D12Resource_Release(sparse);
ID3D12DescriptorHeap_Release(cpu);
ID3D12DescriptorHeap_Release(gpu);
ID3D12Heap_Release(heap_live);
destroy_test_context(&context);
}

View File

@ -1187,247 +1187,3 @@ void test_create_fence(void)
ok(!refcount, "ID3D12Device has %u references left.\n", (unsigned int)refcount);
}
void test_fence_wait_robustness_inner(bool shared_handles)
{
VKD3D_UNUSED HANDLE shared_signal = NULL;
VKD3D_UNUSED HANDLE shared_drain = NULL;
VKD3D_UNUSED HANDLE shared_wait = NULL;
ID3D12CommandAllocator *allocator[2];
ID3D12Fence *signal_fence_dup = NULL;
D3D12_COMMAND_QUEUE_DESC queue_desc;
ID3D12Fence *drain_fence_dup = NULL;
ID3D12Fence *wait_fence_dup = NULL;
ID3D12GraphicsCommandList *list[2];
ID3D12CommandQueue *compute_queue;
struct test_context context;
ID3D12Fence *signal_fence;
ID3D12Fence *drain_fence;
ID3D12Fence *wait_fence;
ID3D12Resource *src;
ID3D12Resource *dst;
unsigned int i;
HANDLE event;
UINT value;
HRESULT hr;
if (!init_compute_test_context(&context))
return;
hr = ID3D12Device_CreateFence(context.device, 0,
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
&IID_ID3D12Fence, (void**)&signal_fence);
todo_if(shared_handles) ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
if (FAILED(hr))
{
skip("Failed to create fence, skipping test ...\n");
destroy_test_context(&context);
return;
}
hr = ID3D12Device_CreateFence(context.device, 0,
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
&IID_ID3D12Fence, (void**)&wait_fence);
ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
if (FAILED(hr))
{
skip("Failed to create fence, skipping test ...\n");
ID3D12Fence_Release(signal_fence);
destroy_test_context(&context);
return;
}
hr = ID3D12Device_CreateFence(context.device, 0,
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
&IID_ID3D12Fence, (void**)&drain_fence);
ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
if (FAILED(hr))
{
skip("Failed to create fence, skipping test ...\n");
ID3D12Fence_Release(signal_fence);
ID3D12Fence_Release(wait_fence);
destroy_test_context(&context);
return;
}
#ifdef _WIN32
if (shared_handles)
{
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)signal_fence,
NULL, GENERIC_ALL, NULL, &shared_signal);
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)wait_fence,
NULL, GENERIC_ALL, NULL, &shared_wait);
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)drain_fence,
NULL, GENERIC_ALL, NULL, &shared_drain);
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
ID3D12Fence_Release(signal_fence);
ID3D12Fence_Release(wait_fence);
ID3D12Fence_Release(drain_fence);
hr = ID3D12Device_OpenSharedHandle(context.device, shared_signal, &IID_ID3D12Fence, (void**)&signal_fence);
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
hr = ID3D12Device_OpenSharedHandle(context.device, shared_wait, &IID_ID3D12Fence, (void**)&wait_fence);
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
hr = ID3D12Device_OpenSharedHandle(context.device, shared_drain, &IID_ID3D12Fence, (void**)&drain_fence);
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
/* OpenSharedHandle takes a kernel level reference on the HANDLE. */
hr = ID3D12Device_OpenSharedHandle(context.device, shared_signal, &IID_ID3D12Fence, (void**)&signal_fence_dup);
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
hr = ID3D12Device_OpenSharedHandle(context.device, shared_wait, &IID_ID3D12Fence, (void**)&wait_fence_dup);
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
hr = ID3D12Device_OpenSharedHandle(context.device, shared_drain, &IID_ID3D12Fence, (void**)&drain_fence_dup);
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
/* Observed behavior: Closing the last reference to the kernel HANDLE object unblocks all waiters.
* This isn't really implementable in Wine as it stands since applications are free to share
* the HANDLE and Dupe it arbitrarily.
* For now, assume this is not a thing, we can report TDR-like situations if this comes up in practice. */
if (shared_signal)
CloseHandle(shared_signal);
if (shared_wait)
CloseHandle(shared_wait);
if (shared_drain)
CloseHandle(shared_drain);
}
#endif
memset(&queue_desc, 0, sizeof(queue_desc));
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
src = create_default_buffer(context.device, 256 * 1024 * 1024, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_SOURCE);
dst = create_default_buffer(context.device, 256 * 1024 * 1024, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
ID3D12Device_CreateCommandQueue(context.device, &queue_desc, &IID_ID3D12CommandQueue, (void**)&compute_queue);
for (i = 0; i < 2; i++)
{
ID3D12Device_CreateCommandAllocator(context.device, D3D12_COMMAND_LIST_TYPE_COMPUTE,
&IID_ID3D12CommandAllocator, (void**)&allocator[i]);
ID3D12Device_CreateCommandList(context.device, 0, D3D12_COMMAND_LIST_TYPE_COMPUTE, allocator[i], NULL,
&IID_ID3D12GraphicsCommandList, (void**)&list[i]);
}
/* Heavy copy action. */
for (i = 0; i < 128; i++)
{
ID3D12GraphicsCommandList_CopyResource(list[0], dst, src);
ID3D12GraphicsCommandList_CopyResource(list[1], src, dst);
}
ID3D12GraphicsCommandList_Close(list[0]);
ID3D12GraphicsCommandList_Close(list[1]);
/* Note on ref-count checks: The debug layers can take transient public ref-counts it seems. */
ID3D12CommandQueue_ExecuteCommandLists(context.queue, 1, (ID3D12CommandList * const *)&list[0]);
ID3D12CommandQueue_Signal(context.queue, signal_fence, 1);
/* Validate that signal/wait does not take public ref-counts. */
value = get_refcount(signal_fence);
ok(value == 1, "Unexpected ref-count %u\n", value);
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time. */
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
ok(value == 0, "Unexpected signal event %u.\n", value);
/* Try waiting for a signal that never comes. We'll be able to unblock this wait
* when we fully release the fence. */
ID3D12CommandQueue_Wait(compute_queue, signal_fence, UINT64_MAX);
value = get_refcount(signal_fence);
ok(value == 1, "Unexpected ref-count %u\n", value);
ID3D12CommandQueue_Signal(compute_queue, wait_fence, 1);
value = get_refcount(wait_fence);
ok(value == 1, "Unexpected ref-count %u\n", value);
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time. */
value = (UINT)ID3D12Fence_GetCompletedValue(wait_fence);
ok(value == 0, "Unexpected signal event %u.\n", value);
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
ok(value == 0, "Unexpected signal event %u.\n", value);
ID3D12CommandQueue_Wait(compute_queue, wait_fence, 1);
value = get_refcount(wait_fence);
ok(value == 1, "Unexpected ref-count %u\n", value);
/* Check that we can queue up event completion.
* Again, verify that releasing the fence unblocks all waiters ... */
event = create_event();
ID3D12Fence_SetEventOnCompletion(signal_fence, UINT64_MAX, event);
if (signal_fence_dup)
ID3D12Fence_Release(signal_fence_dup);
if (wait_fence_dup)
ID3D12Fence_Release(wait_fence_dup);
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time.
* Makes sure that the fences aren't signalled when we try to free them.
* (Sure, there is a theoretical race condition if GPU completes between this check and the release, but seriously ...). */
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
ok(value == 0, "Unexpected signal event %u.\n", value);
value = (UINT)ID3D12Fence_GetCompletedValue(wait_fence);
ok(value == 0, "Unexpected signal event %u.\n", value);
/* Test that it's valid to release fence while it's in flight.
* If we don't cause device lost and drain_fence is waited on successfully we pass the test. */
value = ID3D12Fence_Release(signal_fence);
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
value = ID3D12Fence_Release(wait_fence);
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
ID3D12CommandQueue_ExecuteCommandLists(compute_queue, 1, (ID3D12CommandList * const *)&list[1]);
ID3D12CommandQueue_Signal(compute_queue, drain_fence, 1);
wait_event(event, INFINITE);
destroy_event(event);
ID3D12Fence_SetEventOnCompletion(drain_fence, 1, NULL);
value = (UINT)ID3D12Fence_GetCompletedValue(drain_fence);
ok(value == 1, "Expected fence wait value 1, but got %u.\n", value);
if (drain_fence_dup)
{
/* Check we observe the counter in sibling fences as well. */
value = (UINT)ID3D12Fence_GetCompletedValue(drain_fence_dup);
ok(value == 1, "Expected fence wait value 1, but got %u.\n", value);
ID3D12Fence_Release(drain_fence_dup);
}
value = ID3D12Fence_Release(drain_fence);
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
/* Early freeing of fences might signal the drain fence too early, causing GPU hang. */
wait_queue_idle(context.device, context.queue);
wait_queue_idle(context.device, compute_queue);
ID3D12CommandQueue_Release(compute_queue);
for (i = 0; i < 2; i++)
{
ID3D12CommandAllocator_Release(allocator[i]);
ID3D12GraphicsCommandList_Release(list[i]);
}
ID3D12Resource_Release(dst);
ID3D12Resource_Release(src);
destroy_test_context(&context);
}
void test_fence_wait_robustness(void)
{
test_fence_wait_robustness_inner(false);
}
void test_fence_wait_robustness_shared(void)
{
#ifdef _WIN32
test_fence_wait_robustness_inner(true);
#else
skip("Shared fences not supported on native Linux build.\n");
#endif
}

View File

@ -27,10 +27,6 @@ PFN_D3D12_GET_DEBUG_INTERFACE pfn_D3D12GetDebugInterface;
const char *vkd3d_test_platform = "other";
struct vkd3d_test_state_context vkd3d_test_state;
#ifdef _WIN32
RENDERDOC_API_1_0_0 *renderdoc_api;
#endif
bool compare_float(float f, float g, int ulps)
{
int x, y;
@ -846,9 +842,6 @@ ID3D12CommandSignature *create_command_signature_(unsigned int line,
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
signature_desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS);
break;
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
signature_desc.ByteStride = sizeof(D3D12_DISPATCH_RAYS_DESC);
break;
default:
return NULL;
}
@ -865,7 +858,6 @@ ID3D12CommandSignature *create_command_signature_(unsigned int line,
bool init_compute_test_context_(unsigned int line, struct test_context *context)
{
D3D12_COMMAND_LIST_TYPE command_list_type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
ID3D12Device *device;
HRESULT hr;
@ -878,21 +870,14 @@ bool init_compute_test_context_(unsigned int line, struct test_context *context)
}
device = context->device;
#ifdef _WIN32
begin_renderdoc_capturing(device);
/* Workaround RenderDoc bug. It expects a DIRECT command queue to exist. */
if (renderdoc_api)
command_list_type = D3D12_COMMAND_LIST_TYPE_DIRECT;
#endif
context->queue = create_command_queue_(line, device,
command_list_type, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
hr = ID3D12Device_CreateCommandAllocator(device, command_list_type,
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_COMPUTE,
&IID_ID3D12CommandAllocator, (void **)&context->allocator);
ok_(line)(hr == S_OK, "Failed to create command allocator, hr %#x.\n", hr);
hr = ID3D12Device_CreateCommandList(device, 0, command_list_type,
hr = ID3D12Device_CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
context->allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&context->list);
ok_(line)(hr == S_OK, "Failed to create command list, hr %#x.\n", hr);

View File

@ -19,10 +19,6 @@
#ifndef __VKD3D_D3D12_TEST_UTILS_H
#define __VKD3D_D3D12_TEST_UTILS_H
#ifdef _WIN32
#include "renderdoc_app.h"
#endif
#define SHADER_BYTECODE(code) {code,sizeof(code)}
#define wait_queue_idle(a, b) wait_queue_idle_(__LINE__, a, b)
@ -1053,45 +1049,6 @@ static inline void create_render_target_(unsigned int line, struct test_context
ID3D12Device_CreateRenderTargetView(context->device, *render_target, NULL, *rtv);
}
/* Utility code for capturing native D3D12 tests, which is why this only covers Win32.
* Launch the d3d12.exe test binary from RenderDoc UI.
* For Vulkan capturing, use VKD3D_AUTO_CAPTURE_COUNTS and friends instead. */
#ifdef _WIN32
extern RENDERDOC_API_1_0_0 *renderdoc_api;
static inline void begin_renderdoc_capturing(ID3D12Device *device)
{
pRENDERDOC_GetAPI get_api;
HANDLE renderdoc;
FARPROC fn_ptr;
if (!renderdoc_api)
{
renderdoc = GetModuleHandleA("renderdoc.dll");
if (renderdoc)
{
fn_ptr = GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
if (fn_ptr)
{
/* Workaround compiler warnings about casting to function pointer. */
memcpy(&get_api, &fn_ptr, sizeof(fn_ptr));
if (!get_api(eRENDERDOC_API_Version_1_0_0, (void **)&renderdoc_api))
renderdoc_api = NULL;
}
}
}
if (renderdoc_api)
renderdoc_api->StartFrameCapture(device, NULL);
}
static inline void end_renderdoc_capturing(ID3D12Device *device)
{
if (renderdoc_api)
renderdoc_api->EndFrameCapture(device, NULL);
}
#endif
#define init_test_context(context, desc) init_test_context_(__LINE__, context, desc)
static inline bool init_test_context_(unsigned int line, struct test_context *context,
const struct test_context_desc *desc)
@ -1109,10 +1066,6 @@ static inline bool init_test_context_(unsigned int line, struct test_context *co
}
device = context->device;
#ifdef _WIN32
begin_renderdoc_capturing(device);
#endif
context->queue = create_command_queue_(line, device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_DIRECT,
@ -1164,10 +1117,6 @@ static inline void destroy_test_context_(unsigned int line, struct test_context
{
ULONG refcount;
#ifdef _WIN32
end_renderdoc_capturing(context->device);
#endif
if (context->pipeline_state)
ID3D12PipelineState_Release(context->pipeline_state);
if (context->root_signature)

View File

@ -123,8 +123,6 @@ decl_test(test_tgsm);
decl_test(test_uav_load);
decl_test(test_cs_uav_store);
decl_test(test_uav_counters);
decl_test(test_uav_counter_null_behavior_dxbc);
decl_test(test_uav_counter_null_behavior_dxil);
decl_test(test_decrement_uav_counter);
decl_test(test_atomic_instructions_dxbc);
decl_test(test_atomic_instructions_dxil);
@ -137,7 +135,6 @@ decl_test(test_resolve_non_issued_query_data);
decl_test(test_resolve_query_data_in_different_command_list);
decl_test(test_resolve_query_data_in_reordered_command_list);
decl_test(test_execute_indirect);
decl_test(test_execute_indirect_state);
decl_test(test_dispatch_zero_thread_groups);
decl_test(test_unaligned_vertex_stride);
decl_test(test_zero_vertex_stride);
@ -297,7 +294,6 @@ decl_test(test_integer_blending_pipeline_state);
decl_test(test_discard_resource_uav);
decl_test(test_unbound_rtv_rendering);
decl_test(test_raytracing_local_rs_static_sampler);
decl_test(test_raytracing_local_rs_static_sampler_collection);
decl_test(test_rayquery);
decl_test(test_typed_srv_uav_cast);
decl_test(test_typed_srv_cast_clear);
@ -308,16 +304,3 @@ decl_test(test_mesh_shader_execute_indirect);
decl_test(test_amplification_shader);
decl_test(test_advanced_cbv_layout);
decl_test(test_shader_waveop_maximal_convergence);
decl_test(test_uav_3d_sliced_view);
decl_test(test_pipeline_no_ps_nonzero_rts);
decl_test(test_root_descriptor_offset_sign);
decl_test(test_raytracing_no_global_root_signature);
decl_test(test_raytracing_missing_required_objects);
decl_test(test_raytracing_reject_duplicate_objects);
decl_test(test_raytracing_embedded_subobjects);
decl_test(test_raytracing_default_association_tiebreak);
decl_test(test_raytracing_collection_identifiers);
decl_test(test_fence_wait_robustness);
decl_test(test_fence_wait_robustness_shared);
decl_test(test_root_signature_empty_blob);
decl_test(test_sparse_buffer_memory_lifetime);