Compare commits
33 Commits
master
...
execute-in
Author | SHA1 | Date |
---|---|---|
Hans-Kristian Arntzen | e497e56aa1 | |
Hans-Kristian Arntzen | 6434db2c82 | |
Hans-Kristian Arntzen | 0ea5a17797 | |
Hans-Kristian Arntzen | b77091ba6b | |
Hans-Kristian Arntzen | ab071fb208 | |
Hans-Kristian Arntzen | 6ac298929d | |
Hans-Kristian Arntzen | 2078912c26 | |
Hans-Kristian Arntzen | d7567cbb97 | |
Hans-Kristian Arntzen | eb1e3ae656 | |
Hans-Kristian Arntzen | 0229889217 | |
Hans-Kristian Arntzen | 5b33483ce9 | |
Hans-Kristian Arntzen | 8140b26c93 | |
Hans-Kristian Arntzen | 4aeca16468 | |
Hans-Kristian Arntzen | c2d516e688 | |
Hans-Kristian Arntzen | ebbf4b5338 | |
Hans-Kristian Arntzen | 1b6f7d4c68 | |
Hans-Kristian Arntzen | caa9b0ae24 | |
Hans-Kristian Arntzen | 458391e794 | |
Hans-Kristian Arntzen | 186b45a61f | |
Hans-Kristian Arntzen | 124768c1d6 | |
Hans-Kristian Arntzen | a9583f4358 | |
Hans-Kristian Arntzen | 1591134b7e | |
Hans-Kristian Arntzen | dd840e2004 | |
Hans-Kristian Arntzen | 4a507c3a2b | |
Hans-Kristian Arntzen | a8e46bbff1 | |
Hans-Kristian Arntzen | 59b75b5b1d | |
Hans-Kristian Arntzen | e72fd1414f | |
Hans-Kristian Arntzen | e3f8889b24 | |
Hans-Kristian Arntzen | 4ade0d37b8 | |
Hans-Kristian Arntzen | 1f1b6c0093 | |
Hans-Kristian Arntzen | f46d175935 | |
Hans-Kristian Arntzen | 33bad640ab | |
Hans-Kristian Arntzen | 102e2dac3a |
|
@ -27,6 +27,7 @@
|
|||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
|
@ -46,11 +47,13 @@
|
|||
|
||||
static inline uint64_t align64(uint64_t addr, uint64_t alignment)
|
||||
{
|
||||
assert(alignment > 0);
|
||||
return (addr + (alignment - 1)) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
static inline size_t align(size_t addr, size_t alignment)
|
||||
{
|
||||
assert(alignment > 0);
|
||||
return (addr + (alignment - 1)) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
|
|
|
@ -97,6 +97,14 @@ void DEBUG_CHANNEL_INIT(uvec3 id)
|
|||
#endif
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3 id, uint inst)
|
||||
{
|
||||
if (!DEBUG_SHADER_RING_ACTIVE)
|
||||
return;
|
||||
DEBUG_CHANNEL_ID = id;
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = inst;
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_UNLOCK_MESSAGE(RingBuffer buf, uint offset, uint num_words)
|
||||
{
|
||||
memoryBarrierBuffer();
|
||||
|
|
|
@ -87,6 +87,9 @@ extern "C" {
|
|||
#define VKD3D_CONFIG_FLAG_BREADCRUMBS (1ull << 25)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY (1ull << 26)
|
||||
#define VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC (1ull << 27)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
|
||||
#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_ROBUST_PHYSICAL_CBV (1ull << 30)
|
||||
|
||||
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);
|
||||
|
||||
|
|
|
@ -241,6 +241,7 @@ struct vkd3d_shader_root_constant
|
|||
struct vkd3d_shader_root_descriptor
|
||||
{
|
||||
struct vkd3d_shader_resource_binding *binding;
|
||||
uint32_t raw_va_root_descriptor_index;
|
||||
};
|
||||
|
||||
struct vkd3d_shader_root_parameter
|
||||
|
@ -332,6 +333,9 @@ enum vkd3d_shader_quirk
|
|||
/* For Position builtins in Output storage class, emit Invariant decoration.
|
||||
* Normally, games have to emit Precise math for position, but if they forget ... */
|
||||
VKD3D_SHADER_QUIRK_INVARIANT_POSITION = (1 << 2),
|
||||
|
||||
/* For raw VA CBVs, range check every access. */
|
||||
VKD3D_SHADER_QUIRK_FORCE_ROBUST_PHYSICAL_CBV = (1 << 3),
|
||||
};
|
||||
|
||||
struct vkd3d_shader_quirk_hash
|
||||
|
|
|
@ -846,6 +846,18 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
}
|
||||
}
|
||||
|
||||
if (quirks & VKD3D_SHADER_QUIRK_FORCE_ROBUST_PHYSICAL_CBV)
|
||||
{
|
||||
const dxil_spv_option_robust_physical_cbv_load robust_cbv =
|
||||
{ { DXIL_SPV_OPTION_ROBUST_PHYSICAL_CBV_LOAD }, DXIL_SPV_TRUE };
|
||||
if (dxil_spv_converter_add_option(converter, &robust_cbv.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support ROBUST_PHYSICAL_CBV_LOAD.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
remap_userdata.shader_interface_info = shader_interface_info;
|
||||
remap_userdata.shader_interface_local_info = NULL;
|
||||
remap_userdata.num_root_descriptors = num_root_descriptors;
|
||||
|
@ -925,6 +937,7 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
vkd3d_shader_hash_t hash;
|
||||
char *demangled_export;
|
||||
int ret = VKD3D_OK;
|
||||
uint32_t quirks;
|
||||
void *code;
|
||||
|
||||
dxil_spv_set_thread_log_callback(vkd3d_dxil_log_callback, NULL);
|
||||
|
@ -943,6 +956,8 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
}
|
||||
}
|
||||
|
||||
quirks = vkd3d_shader_compile_arguments_select_quirks(compiler_args, hash);
|
||||
|
||||
dxil_spv_begin_thread_allocator_context();
|
||||
|
||||
vkd3d_shader_dump_shader(hash, dxil, "lib.dxil");
|
||||
|
@ -1213,6 +1228,18 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
WARN("dxil-spirv does not support SHADER_SOURCE_FILE.\n");
|
||||
}
|
||||
|
||||
if (quirks & VKD3D_SHADER_QUIRK_FORCE_ROBUST_PHYSICAL_CBV)
|
||||
{
|
||||
const dxil_spv_option_robust_physical_cbv_load robust_cbv =
|
||||
{ { DXIL_SPV_OPTION_ROBUST_PHYSICAL_CBV_LOAD }, DXIL_SPV_TRUE };
|
||||
if (dxil_spv_converter_add_option(converter, &robust_cbv.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support ROBUST_PHYSICAL_CBV_LOAD.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
if (compiler_args)
|
||||
{
|
||||
for (i = 0; i < compiler_args->target_extension_count; i++)
|
||||
|
|
|
@ -50,6 +50,8 @@ static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_co
|
|||
return "dispatch";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT:
|
||||
return "execute_indirect";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE:
|
||||
return "execute_indirect_template";
|
||||
case VKD3D_BREADCRUMB_COMMAND_COPY:
|
||||
return "copy";
|
||||
case VKD3D_BREADCRUMB_COMMAND_RESOLVE:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -61,12 +61,56 @@ void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
|
|||
#define DEBUG_CHANNEL_WORD_COOKIE 0xdeadca70u
|
||||
#define DEBUG_CHANNEL_WORD_MASK 0xfffffff0u
|
||||
|
||||
static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32: return "RootConst";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO: return "IBO VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI: return "IBO VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE: return "IBO Size";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT: return "IBO Type";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO: return "VBO VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI: return "VBO VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE: return "VBO Size";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE: return "VBO Stride";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO: return "ROOT VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI: return "ROOT VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT: return "Vertex Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT: return "Index Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT: return "Instance Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX: return "First Index";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX: return "First Vertex";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE: return "First Instance";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET: return "Vertex Offset";
|
||||
default: return "???";
|
||||
}
|
||||
}
|
||||
|
||||
static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
|
||||
uint32_t word_offset, uint32_t message_word_count)
|
||||
{
|
||||
uint32_t i, debug_instance, debug_thread_id[3], fmt;
|
||||
char message_buffer[4096];
|
||||
uint64_t shader_hash;
|
||||
size_t len, avail;
|
||||
|
||||
if (message_word_count < 8)
|
||||
{
|
||||
|
@ -80,52 +124,107 @@ static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring
|
|||
debug_thread_id[i] = READ_RING_WORD(word_offset + 4 + i);
|
||||
fmt = READ_RING_WORD(word_offset + 7);
|
||||
|
||||
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %u, ID (%u, %u, %u):",
|
||||
shader_hash, debug_instance,
|
||||
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
word_offset += 8;
|
||||
message_word_count -= 8;
|
||||
|
||||
for (i = 0; i < message_word_count; i++)
|
||||
if (shader_hash == 0)
|
||||
{
|
||||
union
|
||||
/* We got this from our internal debug shaders. Pretty-print.
|
||||
* Make sure the log is sortable for easier debug.
|
||||
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
|
||||
* but that is overengineering at this time ... */
|
||||
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
|
||||
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
if (message_word_count == 2)
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
int32_t i32;
|
||||
} u;
|
||||
const char *delim;
|
||||
size_t len, avail;
|
||||
u.u32 = READ_RING_WORD(word_offset + i);
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
snprintf(message_buffer + len, avail, "DrawCount %u, MaxDrawCount %u",
|
||||
READ_RING_WORD(word_offset + 0),
|
||||
READ_RING_WORD(word_offset + 1));
|
||||
}
|
||||
else if (message_word_count == 4)
|
||||
{
|
||||
union { uint32_t u32; float f32; int32_t s32; } value;
|
||||
enum vkd3d_patch_command_token token;
|
||||
uint32_t dst_offset;
|
||||
uint32_t src_offset;
|
||||
|
||||
len = strlen(message_buffer);
|
||||
if (len + 1 >= sizeof(message_buffer))
|
||||
break;
|
||||
avail = sizeof(message_buffer) - len;
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
delim = i == 0 ? " " : ", ";
|
||||
token = READ_RING_WORD(word_offset + 0);
|
||||
dst_offset = READ_RING_WORD(word_offset + 1);
|
||||
src_offset = READ_RING_WORD(word_offset + 2);
|
||||
value.u32 = READ_RING_WORD(word_offset + 3);
|
||||
|
||||
if (vkd3d_patch_command_token_is_hex(token))
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- #%08x",
|
||||
vkd3d_patch_command_token_str(token), value.u32);
|
||||
}
|
||||
else if (token == VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32)
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- {hex #%08x, s32 %d, f32 %f}",
|
||||
vkd3d_patch_command_token_str(token), value.u32, value.s32, value.f32);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- %d",
|
||||
vkd3d_patch_command_token_str(token), value.s32);
|
||||
}
|
||||
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
snprintf(message_buffer + len, avail, " (dst offset %u, src offset %u)", dst_offset, src_offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %010u, ID (%u, %u, %u):",
|
||||
shader_hash, debug_instance,
|
||||
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
for (i = 0; i < message_word_count; i++)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
int32_t i32;
|
||||
} u;
|
||||
const char *delim;
|
||||
u.u32 = READ_RING_WORD(word_offset + i);
|
||||
|
||||
len = strlen(message_buffer);
|
||||
if (len + 1 >= sizeof(message_buffer))
|
||||
break;
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
delim = i == 0 ? " " : ", ";
|
||||
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
|
||||
switch ((fmt >> (2u * i)) & 3u)
|
||||
{
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
|
||||
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
|
||||
break;
|
||||
switch ((fmt >> (2u * i)) & 3u)
|
||||
{
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
|
||||
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_I32:
|
||||
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
|
||||
break;
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_I32:
|
||||
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_F32:
|
||||
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
|
||||
break;
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_F32:
|
||||
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
|
||||
break;
|
||||
|
||||
default:
|
||||
snprintf(message_buffer + len, avail, "%s????", delim);
|
||||
break;
|
||||
default:
|
||||
snprintf(message_buffer + len, avail, "%s????", delim);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -125,6 +125,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
|
|||
VK_EXTENSION(NV_FRAGMENT_SHADER_BARYCENTRIC, NV_fragment_shader_barycentric),
|
||||
VK_EXTENSION(NV_COMPUTE_SHADER_DERIVATIVES, NV_compute_shader_derivatives),
|
||||
VK_EXTENSION_COND(NV_DEVICE_DIAGNOSTIC_CHECKPOINTS, NV_device_diagnostic_checkpoints, VKD3D_CONFIG_FLAG_BREADCRUMBS),
|
||||
VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS, NV_device_generated_commands),
|
||||
/* VALVE extensions */
|
||||
VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type),
|
||||
VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping),
|
||||
|
@ -494,6 +495,12 @@ static const struct vkd3d_instance_application_meta application_override[] = {
|
|||
/* MSVC fails to compile empty array. */
|
||||
{ VKD3D_STRING_COMPARE_EXACT, "GravityMark.exe", VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE, 0 },
|
||||
{ VKD3D_STRING_COMPARE_EXACT, "Deathloop.exe", VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE, 0 },
|
||||
/* Halo Infinite (1240440).
|
||||
* Game relies on NON_ZEROED committed UAVs to be cleared to zero on allocation.
|
||||
* This works okay with zerovram on first game boot, but not later, since this memory is guaranteed to be recycled.
|
||||
* Game also relies on indirectly modifying CBV root descriptors, which means we are forced to rel yon RAW_VA_CBV. */
|
||||
{ VKD3D_STRING_COMPARE_EXACT, "HaloInfinite.exe",
|
||||
VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV | VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV, 0 },
|
||||
/* Shadow of the Tomb Raider (750920).
|
||||
* Invariant workarounds actually cause more issues than they resolve on NV.
|
||||
* RADV already has workarounds by default.
|
||||
|
@ -610,6 +617,7 @@ static void vkd3d_instance_apply_global_shader_quirks(void)
|
|||
static const struct override overrides[] =
|
||||
{
|
||||
{ VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION, VKD3D_SHADER_QUIRK_INVARIANT_POSITION, true },
|
||||
{ VKD3D_CONFIG_FLAG_FORCE_ROBUST_PHYSICAL_CBV, VKD3D_SHADER_QUIRK_FORCE_ROBUST_PHYSICAL_CBV, false },
|
||||
};
|
||||
uint64_t eq_test;
|
||||
unsigned int i;
|
||||
|
@ -652,6 +660,8 @@ static const struct vkd3d_debug_option vkd3d_config_options[] =
|
|||
{"breadcrumbs", VKD3D_CONFIG_FLAG_BREADCRUMBS},
|
||||
{"pipeline_library_app_cache", VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY},
|
||||
{"shader_cache_sync", VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC},
|
||||
{"force_raw_va_cbv", VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV},
|
||||
{"force_robust_physical_cbv", VKD3D_CONFIG_FLAG_FORCE_ROBUST_PHYSICAL_CBV},
|
||||
};
|
||||
|
||||
static void vkd3d_config_flags_init_once(void)
|
||||
|
@ -1412,6 +1422,16 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
|
|||
vk_prepend_struct(&info->features2, &info->compute_shader_derivatives_features_nv);
|
||||
}
|
||||
|
||||
if (vulkan_info->NV_device_generated_commands)
|
||||
{
|
||||
info->device_generated_commands_features_nv.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV;
|
||||
info->device_generated_commands_properties_nv.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV;
|
||||
vk_prepend_struct(&info->features2, &info->device_generated_commands_features_nv);
|
||||
vk_prepend_struct(&info->properties2, &info->device_generated_commands_properties_nv);
|
||||
}
|
||||
|
||||
if (vulkan_info->KHR_shader_atomic_int64)
|
||||
{
|
||||
info->shader_atomic_int64_features.sType =
|
||||
|
@ -2534,22 +2554,52 @@ static void d3d12_remove_device_singleton(LUID luid)
|
|||
}
|
||||
}
|
||||
|
||||
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, VkDeviceSize size, struct vkd3d_scratch_buffer *scratch)
|
||||
static HRESULT d3d12_device_create_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
|
||||
VkDeviceSize size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
|
||||
{
|
||||
struct vkd3d_allocate_heap_memory_info alloc_info;
|
||||
HRESULT hr;
|
||||
|
||||
TRACE("device %p, size %llu, scratch %p.\n", device, size, scratch);
|
||||
|
||||
memset(&alloc_info, 0, sizeof(alloc_info));
|
||||
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
alloc_info.heap_desc.SizeInBytes = size;
|
||||
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
if (kind == VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE)
|
||||
{
|
||||
struct vkd3d_allocate_heap_memory_info alloc_info;
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
|
||||
&alloc_info, &scratch->allocation)))
|
||||
return hr;
|
||||
/* We only care about memory types for INDIRECT_PREPROCESS. */
|
||||
assert(memory_types == ~0u);
|
||||
|
||||
memset(&alloc_info, 0, sizeof(alloc_info));
|
||||
alloc_info.heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
alloc_info.heap_desc.SizeInBytes = size;
|
||||
alloc_info.heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
alloc_info.heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
||||
alloc_info.extra_allocation_flags = VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_heap_memory(device, &device->memory_allocator,
|
||||
&alloc_info, &scratch->allocation)))
|
||||
return hr;
|
||||
}
|
||||
else if (kind == VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS)
|
||||
{
|
||||
struct vkd3d_allocate_memory_info alloc_info;
|
||||
memset(&alloc_info, 0, sizeof(alloc_info));
|
||||
|
||||
alloc_info.heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
alloc_info.memory_requirements.size = size;
|
||||
alloc_info.memory_requirements.memoryTypeBits = memory_types;
|
||||
alloc_info.memory_requirements.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
alloc_info.heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
||||
alloc_info.optional_memory_properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
alloc_info.flags = VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER | VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH;
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_memory(device, &device->memory_allocator,
|
||||
&alloc_info, &scratch->allocation)))
|
||||
return hr;
|
||||
}
|
||||
else
|
||||
{
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
scratch->offset = 0;
|
||||
return S_OK;
|
||||
|
@ -2562,35 +2612,47 @@ static void d3d12_device_destroy_scratch_buffer(struct d3d12_device *device, con
|
|||
vkd3d_free_memory(device, &device->memory_allocator, &scratch->allocation);
|
||||
}
|
||||
|
||||
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch)
|
||||
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
|
||||
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch)
|
||||
{
|
||||
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
|
||||
struct vkd3d_scratch_buffer *candidate;
|
||||
size_t i;
|
||||
|
||||
if (min_size > VKD3D_SCRATCH_BUFFER_SIZE)
|
||||
return d3d12_device_create_scratch_buffer(device, min_size, scratch);
|
||||
return d3d12_device_create_scratch_buffer(device, kind, min_size, memory_types, scratch);
|
||||
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
|
||||
if (device->scratch_buffer_count)
|
||||
for (i = pool->scratch_buffer_count; i; i--)
|
||||
{
|
||||
*scratch = device->scratch_buffers[--device->scratch_buffer_count];
|
||||
scratch->offset = 0;
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
return S_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
return d3d12_device_create_scratch_buffer(device, VKD3D_SCRATCH_BUFFER_SIZE, scratch);
|
||||
candidate = &pool->scratch_buffers[i - 1];
|
||||
|
||||
/* Extremely unlikely to fail since we have separate lists per pool kind, but to be 100% correct ... */
|
||||
if (memory_types & (1u << candidate->allocation.device_allocation.vk_memory_type))
|
||||
{
|
||||
*scratch = *candidate;
|
||||
scratch->offset = 0;
|
||||
pool->scratch_buffers[i - 1] = pool->scratch_buffers[--pool->scratch_buffer_count];
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
return S_OK;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
return d3d12_device_create_scratch_buffer(device, kind, VKD3D_SCRATCH_BUFFER_SIZE, memory_types, scratch);
|
||||
}
|
||||
|
||||
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch)
|
||||
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
|
||||
const struct vkd3d_scratch_buffer *scratch)
|
||||
{
|
||||
struct d3d12_device_scratch_pool *pool = &device->scratch_pools[kind];
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
|
||||
if (scratch->allocation.resource.size == VKD3D_SCRATCH_BUFFER_SIZE &&
|
||||
device->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
|
||||
pool->scratch_buffer_count < VKD3D_SCRATCH_BUFFER_COUNT)
|
||||
{
|
||||
device->scratch_buffers[device->scratch_buffer_count++] = *scratch;
|
||||
pool->scratch_buffers[pool->scratch_buffer_count++] = *scratch;
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
}
|
||||
else
|
||||
|
@ -2851,10 +2913,11 @@ static void d3d12_device_global_pipeline_cache_cleanup(struct d3d12_device *devi
|
|||
static void d3d12_device_destroy(struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i;
|
||||
size_t i, j;
|
||||
|
||||
for (i = 0; i < device->scratch_buffer_count; i++)
|
||||
d3d12_device_destroy_scratch_buffer(device, &device->scratch_buffers[i]);
|
||||
for (i = 0; i < VKD3D_SCRATCH_POOL_KIND_COUNT; i++)
|
||||
for (j = 0; j < device->scratch_pools[i].scratch_buffer_count; j++)
|
||||
d3d12_device_destroy_scratch_buffer(device, &device->scratch_pools[i].scratch_buffers[j]);
|
||||
|
||||
for (i = 0; i < device->query_pool_count; i++)
|
||||
d3d12_device_destroy_query_pool(device, &device->query_pools[i]);
|
||||
|
@ -4467,9 +4530,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(d3d12_device_i
|
|||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(d3d12_device_iface *iface,
|
||||
const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature,
|
||||
const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature_iface,
|
||||
REFIID iid, void **command_signature)
|
||||
{
|
||||
struct d3d12_root_signature *root_signature = impl_from_ID3D12RootSignature(root_signature_iface);
|
||||
struct d3d12_device *device = impl_from_ID3D12Device(iface);
|
||||
struct d3d12_command_signature *object;
|
||||
HRESULT hr;
|
||||
|
@ -4477,7 +4541,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(d3d12_devic
|
|||
TRACE("iface %p, desc %p, root_signature %p, iid %s, command_signature %p.\n",
|
||||
iface, desc, root_signature, debugstr_guid(iid), command_signature);
|
||||
|
||||
if (FAILED(hr = d3d12_command_signature_create(device, desc, &object)))
|
||||
if (FAILED(hr = d3d12_command_signature_create(device, root_signature, desc, &object)))
|
||||
return hr;
|
||||
|
||||
return return_interface(&object->ID3D12CommandSignature_iface,
|
||||
|
|
|
@ -240,6 +240,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev
|
|||
|
||||
alloc_info.heap_desc = heap->desc;
|
||||
alloc_info.host_ptr = host_address;
|
||||
alloc_info.extra_allocation_flags = 0;
|
||||
|
||||
if (FAILED(hr = vkd3d_private_store_init(&heap->private_store)))
|
||||
return hr;
|
||||
|
|
|
@ -349,12 +349,15 @@ static HRESULT vkd3d_import_host_memory(struct d3d12_device *device, void *host_
|
|||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
|
||||
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation,
|
||||
struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
|
||||
{
|
||||
if (device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
allocation->resource.va = vkd3d_get_buffer_device_address(device, allocation->resource.vk_buffer);
|
||||
else
|
||||
else if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
|
||||
else
|
||||
allocation->resource.va = 0xdeadbeef;
|
||||
|
||||
if (!allocation->resource.va)
|
||||
{
|
||||
|
@ -362,7 +365,9 @@ static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocatio
|
|||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
|
||||
/* Internal scratch buffers are not visible to application so we never have to map it back to VkBuffer. */
|
||||
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -446,10 +451,12 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
|
|||
|
||||
if ((allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS) && allocation->resource.va)
|
||||
{
|
||||
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
|
||||
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
|
||||
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
{
|
||||
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
|
||||
}
|
||||
}
|
||||
|
||||
if (allocation->resource.view_map)
|
||||
|
@ -1398,7 +1405,8 @@ HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_a
|
|||
HRESULT hr;
|
||||
|
||||
if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
|
||||
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)))
|
||||
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)) &&
|
||||
!(info->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
hr = vkd3d_suballocate_memory(device, allocator, info, allocation);
|
||||
else
|
||||
hr = vkd3d_memory_allocation_init(allocation, device, allocator, info);
|
||||
|
@ -1447,6 +1455,7 @@ HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_mem
|
|||
alloc_info.heap_flags = info->heap_desc.Flags;
|
||||
alloc_info.host_ptr = info->host_ptr;
|
||||
|
||||
alloc_info.flags |= info->extra_allocation_flags;
|
||||
if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
|
||||
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ vkd3d_shaders =[
|
|||
|
||||
'shaders/vs_swapchain_fullscreen.vert',
|
||||
'shaders/fs_swapchain_fullscreen.frag',
|
||||
'shaders/cs_execute_indirect_patch.comp',
|
||||
'shaders/cs_execute_indirect_patch_debug_ring.comp',
|
||||
]
|
||||
|
||||
vkd3d_src = [
|
||||
|
|
|
@ -1217,6 +1217,144 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
info->data_size = predicate_ops->data_sizes[command_type];
|
||||
}
|
||||
|
||||
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
VkPushConstantRange push_constant_range;
|
||||
VkResult vr;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
|
||||
return hresult_from_errno(rc);
|
||||
|
||||
push_constant_range.offset = 0;
|
||||
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
|
||||
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
|
||||
{
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
meta_indirect_ops->pipelines_count = 0;
|
||||
meta_indirect_ops->pipelines_size = 0;
|
||||
meta_indirect_ops->pipelines = NULL;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
struct vkd3d_meta_execute_indirect_spec_constant_data
|
||||
{
|
||||
struct vkd3d_shader_debug_ring_spec_constants constants;
|
||||
uint32_t workgroup_size_x;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
|
||||
{
|
||||
struct vkd3d_meta_execute_indirect_spec_constant_data execute_indirect_spec_constants;
|
||||
VkSpecializationMapEntry map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES + 1];
|
||||
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
|
||||
struct vkd3d_shader_debug_ring_spec_info debug_ring_info;
|
||||
|
||||
VkSpecializationInfo spec;
|
||||
HRESULT hr = S_OK;
|
||||
VkResult vr;
|
||||
bool debug;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
{
|
||||
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
|
||||
{
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
debug = meta_ops->device->debug_ring.active;
|
||||
|
||||
/* If we have debug ring, we can dump indirect command buffer data to the ring as well.
|
||||
* Vital for debugging broken execute indirect data with templates. */
|
||||
if (debug)
|
||||
{
|
||||
vkd3d_shader_debug_ring_init_spec_constant(meta_ops->device, &debug_ring_info,
|
||||
0 /* Reserve this hash for internal debug streams. */);
|
||||
|
||||
memset(&execute_indirect_spec_constants, 0, sizeof(execute_indirect_spec_constants));
|
||||
execute_indirect_spec_constants.constants = debug_ring_info.constants;
|
||||
execute_indirect_spec_constants.workgroup_size_x = patch_command_count;
|
||||
|
||||
memcpy(map_entry, debug_ring_info.map_entries, sizeof(debug_ring_info.map_entries));
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].constantID = 4;
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].offset =
|
||||
offsetof(struct vkd3d_meta_execute_indirect_spec_constant_data, workgroup_size_x);
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = map_entry;
|
||||
spec.pData = &execute_indirect_spec_constants;
|
||||
spec.mapEntryCount = ARRAY_SIZE(map_entry);
|
||||
spec.dataSize = sizeof(execute_indirect_spec_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
map_entry[0].constantID = 0;
|
||||
map_entry[0].offset = 0;
|
||||
map_entry[0].size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = map_entry;
|
||||
spec.pData = &patch_command_count;
|
||||
spec.mapEntryCount = 1;
|
||||
spec.dataSize = sizeof(patch_command_count);
|
||||
}
|
||||
|
||||
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
|
||||
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
|
||||
|
||||
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
|
||||
|
||||
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
|
||||
debug ? sizeof(cs_execute_indirect_patch_debug_ring) : sizeof(cs_execute_indirect_patch),
|
||||
debug ? cs_execute_indirect_patch_debug_ring : cs_execute_indirect_patch,
|
||||
meta_indirect_ops->vk_pipeline_layout, &spec,
|
||||
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
|
||||
|
||||
if (vr)
|
||||
{
|
||||
hr = hresult_from_vk_result(vr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
|
||||
meta_indirect_ops->pipelines_count++;
|
||||
|
||||
out:
|
||||
pthread_mutex_unlock(&meta_indirect_ops->mutex);
|
||||
return hr;
|
||||
}
|
||||
|
||||
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
}
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1242,8 +1380,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
|
|||
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
|
||||
goto fail_predicate_ops;
|
||||
|
||||
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
|
||||
goto fail_execute_indirect_ops;
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail_execute_indirect_ops:
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
fail_predicate_ops:
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
fail_query_ops:
|
||||
|
@ -1260,6 +1403,7 @@ fail_common:
|
|||
|
||||
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||
|
|
|
@ -2803,6 +2803,14 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
|
|||
allocate_info.heap_desc.SizeInBytes = align(desc->Width, allocate_info.heap_desc.Alignment);
|
||||
allocate_info.heap_desc.Flags = heap_flags | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
|
||||
/* Be very careful with suballocated buffers. */
|
||||
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV) &&
|
||||
(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) &&
|
||||
desc->Width < VKD3D_VA_BLOCK_SIZE)
|
||||
{
|
||||
allocate_info.heap_desc.Flags &= ~D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
||||
}
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_heap_memory(device,
|
||||
&device->memory_allocator, &allocate_info, &object->mem)))
|
||||
goto fail;
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
const int COMMAND_TYPE_COPY_INDEX_TYPE = 4;
|
||||
const int DXGI_FORMAT_R32_UINT = 0x2a;
|
||||
const int VK_INDEX_TYPE_UINT16 = 0;
|
||||
const int VK_INDEX_TYPE_UINT32 = 1;
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = gl_NumWorkGroups.x;
|
||||
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
if (cmd.type == COMMAND_TYPE_COPY_INDEX_TYPE)
|
||||
src_value = src_value == DXGI_FORMAT_R32_UINT ? VK_INDEX_TYPE_UINT32 : VK_INDEX_TYPE_UINT16;
|
||||
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#include "../../../include/shader-debug/debug_channel.h"
|
||||
|
||||
layout(local_size_x_id = 4) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
const int COMMAND_TYPE_COPY_INDEX_TYPE = 4;
|
||||
const int DXGI_FORMAT_R32_UINT = 0x2a;
|
||||
const int VK_INDEX_TYPE_UINT16 = 0;
|
||||
const int VK_INDEX_TYPE_UINT32 = 1;
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
|
||||
// Debug metadata here
|
||||
uint debug_tag;
|
||||
uint implicit_instance;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
if (debug_tag != 0u)
|
||||
DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance);
|
||||
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = gl_NumWorkGroups.x;
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (debug_tag != 0u && gl_WorkGroupID.x == 0)
|
||||
DEBUG_CHANNEL_MSG_UNIFORM(int(max_draws), int(gl_NumWorkGroups.x));
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
if (cmd.type == COMMAND_TYPE_COPY_INDEX_TYPE)
|
||||
src_value = src_value == DXGI_FORMAT_R32_UINT ? VK_INDEX_TYPE_UINT32 : VK_INDEX_TYPE_UINT16;
|
||||
|
||||
if (debug_tag != 0u)
|
||||
DEBUG_CHANNEL_MSG(cmd.type, dst_offset, src_offset, src_value);
|
||||
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -945,6 +945,7 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
|
|||
struct vkd3d_shader_resource_binding *binding;
|
||||
VkDescriptorSetLayoutCreateFlags vk_flags;
|
||||
struct vkd3d_shader_root_parameter *param;
|
||||
uint32_t raw_va_root_descriptor_count = 0;
|
||||
unsigned int hoisted_parameter_index;
|
||||
const D3D12_DESCRIPTOR_RANGE1 *range;
|
||||
unsigned int i, j, k;
|
||||
|
@ -1061,10 +1062,13 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
|
|||
param = &root_signature->parameters[i];
|
||||
param->parameter_type = p->ParameterType;
|
||||
param->descriptor.binding = binding;
|
||||
param->descriptor.raw_va_root_descriptor_index = raw_va_root_descriptor_count;
|
||||
|
||||
context->binding_index += 1;
|
||||
|
||||
if (!raw_va)
|
||||
if (raw_va)
|
||||
raw_va_root_descriptor_count += 1;
|
||||
else
|
||||
context->vk_binding += 1;
|
||||
}
|
||||
|
||||
|
@ -4684,7 +4688,8 @@ static uint32_t vkd3d_bindless_state_get_bindless_flags(struct d3d12_device *dev
|
|||
* The difference in performance is profound (~15% in some cases).
|
||||
* On ACO, BDA with NonWritable can be promoted directly to scalar loads,
|
||||
* which is great. */
|
||||
if (device_info->properties2.properties.vendorID != VKD3D_VENDOR_ID_NVIDIA)
|
||||
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV) ||
|
||||
device_info->properties2.properties.vendorID != VKD3D_VENDOR_ID_NVIDIA)
|
||||
flags |= VKD3D_RAW_VA_ROOT_DESCRIPTOR_CBV;
|
||||
}
|
||||
|
||||
|
|
|
@ -171,6 +171,7 @@ struct vkd3d_vulkan_info
|
|||
bool NV_fragment_shader_barycentric;
|
||||
bool NV_compute_shader_derivatives;
|
||||
bool NV_device_diagnostic_checkpoints;
|
||||
bool NV_device_generated_commands;
|
||||
/* VALVE extensions */
|
||||
bool VALVE_mutable_descriptor_type;
|
||||
bool VALVE_descriptor_set_host_mapping;
|
||||
|
@ -585,6 +586,10 @@ enum vkd3d_allocation_flag
|
|||
VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH = (1u << 3),
|
||||
VKD3D_ALLOCATION_FLAG_NO_FALLBACK = (1u << 4),
|
||||
VKD3D_ALLOCATION_FLAG_DEDICATED = (1u << 5),
|
||||
/* Intended for internal allocation of scratch buffers.
|
||||
* They are never suballocated since we do that ourselves,
|
||||
* and we do not consume space in the VA map. */
|
||||
VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH = (1u << 6),
|
||||
};
|
||||
|
||||
#define VKD3D_MEMORY_CHUNK_SIZE (VKD3D_VA_BLOCK_SIZE * 8)
|
||||
|
@ -606,6 +611,7 @@ struct vkd3d_allocate_heap_memory_info
|
|||
{
|
||||
D3D12_HEAP_DESC heap_desc;
|
||||
void *host_ptr;
|
||||
uint32_t extra_allocation_flags;
|
||||
};
|
||||
|
||||
struct vkd3d_allocate_resource_memory_info
|
||||
|
@ -1450,10 +1456,11 @@ struct vkd3d_shader_debug_ring_spec_constants
|
|||
uint32_t ring_words;
|
||||
};
|
||||
|
||||
#define VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES 4
|
||||
struct vkd3d_shader_debug_ring_spec_info
|
||||
{
|
||||
struct vkd3d_shader_debug_ring_spec_constants constants;
|
||||
VkSpecializationMapEntry map_entries[4];
|
||||
VkSpecializationMapEntry map_entries[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES];
|
||||
VkSpecializationInfo spec_info;
|
||||
};
|
||||
|
||||
|
@ -1836,6 +1843,20 @@ struct vkd3d_query_pool
|
|||
uint32_t next_index;
|
||||
};
|
||||
|
||||
struct d3d12_command_allocator_scratch_pool
|
||||
{
|
||||
struct vkd3d_scratch_buffer *scratch_buffers;
|
||||
size_t scratch_buffers_size;
|
||||
size_t scratch_buffer_count;
|
||||
};
|
||||
|
||||
enum vkd3d_scratch_pool_kind
|
||||
{
|
||||
VKD3D_SCRATCH_POOL_KIND_DEVICE_STORAGE = 0,
|
||||
VKD3D_SCRATCH_POOL_KIND_INDIRECT_PREPROCESS,
|
||||
VKD3D_SCRATCH_POOL_KIND_COUNT
|
||||
};
|
||||
|
||||
/* ID3D12CommandAllocator */
|
||||
struct d3d12_command_allocator
|
||||
{
|
||||
|
@ -1862,9 +1883,7 @@ struct d3d12_command_allocator
|
|||
size_t command_buffers_size;
|
||||
size_t command_buffer_count;
|
||||
|
||||
struct vkd3d_scratch_buffer *scratch_buffers;
|
||||
size_t scratch_buffers_size;
|
||||
size_t scratch_buffer_count;
|
||||
struct d3d12_command_allocator_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
|
||||
|
||||
struct vkd3d_query_pool *query_pools;
|
||||
size_t query_pools_size;
|
||||
|
@ -2065,12 +2084,26 @@ struct d3d12_command_list
|
|||
bool is_valid;
|
||||
bool debug_capture;
|
||||
bool has_replaced_shaders;
|
||||
bool has_valid_index_buffer;
|
||||
|
||||
struct
|
||||
{
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
DXGI_FORMAT dxgi_format;
|
||||
VkIndexType vk_type;
|
||||
bool is_non_null;
|
||||
bool is_dirty;
|
||||
} index_buffer;
|
||||
|
||||
struct
|
||||
{
|
||||
bool has_observed_transition_to_indirect;
|
||||
bool has_emitted_indirect_to_compute_barrier;
|
||||
} execute_indirect;
|
||||
|
||||
VkCommandBuffer vk_command_buffer;
|
||||
VkCommandBuffer vk_init_commands;
|
||||
|
||||
DXGI_FORMAT index_buffer_format;
|
||||
|
||||
struct d3d12_rtv_desc rtvs[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT];
|
||||
struct d3d12_rtv_desc dsv;
|
||||
uint32_t dsv_plane_optimal_mask;
|
||||
|
@ -2359,6 +2392,35 @@ HRESULT d3d12_command_queue_create(struct d3d12_device *device,
|
|||
const D3D12_COMMAND_QUEUE_DESC *desc, struct d3d12_command_queue **queue);
|
||||
void d3d12_command_queue_submit_stop(struct d3d12_command_queue *queue);
|
||||
|
||||
struct vkd3d_execute_indirect_info
|
||||
{
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
VkPipeline vk_pipeline;
|
||||
};
|
||||
|
||||
enum vkd3d_patch_command_token
|
||||
{
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32 = 0,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO = 1,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI = 2,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE = 3,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT = 4,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO = 5,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI = 6,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE = 7,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE = 8,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO = 9,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI = 10,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT = 11,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT = 12,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT = 13,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX = 14,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX = 15,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE = 16,
|
||||
VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET = 17,
|
||||
VKD3D_PATCH_COMMAND_INT_MAX = 0x7fffffff
|
||||
};
|
||||
|
||||
/* ID3D12CommandSignature */
|
||||
struct d3d12_command_signature
|
||||
{
|
||||
|
@ -2366,13 +2428,27 @@ struct d3d12_command_signature
|
|||
LONG refcount;
|
||||
|
||||
D3D12_COMMAND_SIGNATURE_DESC desc;
|
||||
uint32_t argument_buffer_offset;
|
||||
|
||||
/* Complex command signatures require some work to stamp out device generated commands. */
|
||||
struct
|
||||
{
|
||||
VkBuffer buffer;
|
||||
VkDeviceAddress buffer_va;
|
||||
struct vkd3d_device_memory_allocation memory;
|
||||
VkIndirectCommandsLayoutNV layout;
|
||||
uint32_t stride;
|
||||
struct vkd3d_execute_indirect_info pipeline;
|
||||
} state_template;
|
||||
bool requires_state_template;
|
||||
|
||||
struct d3d12_device *device;
|
||||
|
||||
struct vkd3d_private_store private_store;
|
||||
};
|
||||
|
||||
HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_COMMAND_SIGNATURE_DESC *desc,
|
||||
HRESULT d3d12_command_signature_create(struct d3d12_device *device, struct d3d12_root_signature *root_signature,
|
||||
const D3D12_COMMAND_SIGNATURE_DESC *desc,
|
||||
struct d3d12_command_signature **signature);
|
||||
|
||||
static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface)
|
||||
|
@ -2449,6 +2525,7 @@ enum vkd3d_breadcrumb_command_type
|
|||
VKD3D_BREADCRUMB_COMMAND_DRAW_INDEXED,
|
||||
VKD3D_BREADCRUMB_COMMAND_DISPATCH,
|
||||
VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT,
|
||||
VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE,
|
||||
VKD3D_BREADCRUMB_COMMAND_COPY,
|
||||
VKD3D_BREADCRUMB_COMMAND_RESOLVE,
|
||||
VKD3D_BREADCRUMB_COMMAND_WBI,
|
||||
|
@ -2945,6 +3022,41 @@ HRESULT vkd3d_predicate_ops_init(struct vkd3d_predicate_ops *meta_predicate_ops,
|
|||
void vkd3d_predicate_ops_cleanup(struct vkd3d_predicate_ops *meta_predicate_ops,
|
||||
struct d3d12_device *device);
|
||||
|
||||
struct vkd3d_execute_indirect_args
|
||||
{
|
||||
VkDeviceAddress template_va;
|
||||
VkDeviceAddress api_buffer_va;
|
||||
VkDeviceAddress device_generated_commands_va;
|
||||
VkDeviceAddress indirect_count_va;
|
||||
VkDeviceAddress dst_indirect_count_va;
|
||||
uint32_t api_buffer_word_stride;
|
||||
uint32_t device_generated_commands_word_stride;
|
||||
|
||||
/* Arbitrary tag used for debug version of state patcher. Debug messages from tag 0 are ignored. */
|
||||
uint32_t debug_tag;
|
||||
uint32_t implicit_instance;
|
||||
};
|
||||
|
||||
struct vkd3d_execute_indirect_pipeline
|
||||
{
|
||||
VkPipeline vk_pipeline;
|
||||
uint32_t workgroup_size_x;
|
||||
};
|
||||
|
||||
struct vkd3d_execute_indirect_ops
|
||||
{
|
||||
VkPipelineLayout vk_pipeline_layout;
|
||||
struct vkd3d_execute_indirect_pipeline *pipelines;
|
||||
size_t pipelines_count;
|
||||
size_t pipelines_size;
|
||||
pthread_mutex_t mutex;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device);
|
||||
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device);
|
||||
|
||||
struct vkd3d_meta_ops_common
|
||||
{
|
||||
VkShaderModule vk_module_fullscreen_vs;
|
||||
|
@ -2960,6 +3072,7 @@ struct vkd3d_meta_ops
|
|||
struct vkd3d_swapchain_ops swapchain;
|
||||
struct vkd3d_query_ops query;
|
||||
struct vkd3d_predicate_ops predicate;
|
||||
struct vkd3d_execute_indirect_ops execute_indirect;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device);
|
||||
|
@ -2992,6 +3105,9 @@ bool vkd3d_meta_get_query_gather_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
enum vkd3d_predicate_command_type command_type, struct vkd3d_predicate_command_info *info);
|
||||
|
||||
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info);
|
||||
|
||||
enum vkd3d_time_domain_flag
|
||||
{
|
||||
VKD3D_TIME_DOMAIN_DEVICE = 0x00000001u,
|
||||
|
@ -3026,6 +3142,7 @@ struct vkd3d_physical_device_info
|
|||
VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR shader_integer_dot_product_properties;
|
||||
VkPhysicalDeviceDriverPropertiesKHR driver_properties;
|
||||
VkPhysicalDeviceMaintenance4PropertiesKHR maintenance4_properties;
|
||||
VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV device_generated_commands_properties_nv;
|
||||
|
||||
VkPhysicalDeviceProperties2KHR properties2;
|
||||
|
||||
|
@ -3070,6 +3187,7 @@ struct vkd3d_physical_device_info
|
|||
VkPhysicalDeviceCoherentMemoryFeaturesAMD device_coherent_memory_features_amd;
|
||||
VkPhysicalDeviceMaintenance4FeaturesKHR maintenance4_features;
|
||||
VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR ray_tracing_maintenance1_features;
|
||||
VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV device_generated_commands_features_nv;
|
||||
|
||||
VkPhysicalDeviceFeatures2 features2;
|
||||
|
||||
|
@ -3135,6 +3253,12 @@ struct vkd3d_descriptor_qa_heap_buffer_data;
|
|||
/* ID3D12DeviceExt */
|
||||
typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
|
||||
|
||||
struct d3d12_device_scratch_pool
|
||||
{
|
||||
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
|
||||
size_t scratch_buffer_count;
|
||||
};
|
||||
|
||||
struct d3d12_device
|
||||
{
|
||||
d3d12_device_iface ID3D12Device_iface;
|
||||
|
@ -3169,8 +3293,7 @@ struct d3d12_device
|
|||
|
||||
struct vkd3d_memory_allocator memory_allocator;
|
||||
|
||||
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_SCRATCH_BUFFER_COUNT];
|
||||
size_t scratch_buffer_count;
|
||||
struct d3d12_device_scratch_pool scratch_pools[VKD3D_SCRATCH_POOL_KIND_COUNT];
|
||||
|
||||
struct vkd3d_query_pool query_pools[VKD3D_VIRTUAL_QUERY_POOL_COUNT];
|
||||
size_t query_pool_count;
|
||||
|
@ -3244,8 +3367,10 @@ static inline struct d3d12_device *impl_from_ID3D12Device(d3d12_device_iface *if
|
|||
|
||||
bool d3d12_device_validate_shader_meta(struct d3d12_device *device, const struct vkd3d_shader_meta *meta);
|
||||
|
||||
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, VkDeviceSize min_size, struct vkd3d_scratch_buffer *scratch);
|
||||
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, const struct vkd3d_scratch_buffer *scratch);
|
||||
HRESULT d3d12_device_get_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
|
||||
VkDeviceSize min_size, uint32_t memory_types, struct vkd3d_scratch_buffer *scratch);
|
||||
void d3d12_device_return_scratch_buffer(struct d3d12_device *device, enum vkd3d_scratch_pool_kind kind,
|
||||
const struct vkd3d_scratch_buffer *scratch);
|
||||
|
||||
HRESULT d3d12_device_get_query_pool(struct d3d12_device *device, uint32_t type_index, struct vkd3d_query_pool *pool);
|
||||
void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vkd3d_query_pool *pool);
|
||||
|
|
|
@ -45,6 +45,8 @@ enum vkd3d_meta_copy_mode
|
|||
#include <cs_resolve_binary_queries.h>
|
||||
#include <cs_resolve_predicate.h>
|
||||
#include <cs_resolve_query.h>
|
||||
#include <cs_execute_indirect_patch.h>
|
||||
#include <cs_execute_indirect_patch_debug_ring.h>
|
||||
#include <vs_fullscreen_layer.h>
|
||||
#include <vs_fullscreen.h>
|
||||
#include <gs_fullscreen.h>
|
||||
|
|
|
@ -305,6 +305,12 @@ VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)
|
|||
VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE)
|
||||
VK_DEVICE_EXT_PFN(vkGetDescriptorSetHostMappingVALVE)
|
||||
|
||||
/* VK_NV_device_generated_commands */
|
||||
VK_DEVICE_EXT_PFN(vkCreateIndirectCommandsLayoutNV)
|
||||
VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV)
|
||||
VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV)
|
||||
VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV)
|
||||
|
||||
#undef VK_INSTANCE_PFN
|
||||
#undef VK_INSTANCE_EXT_PFN
|
||||
#undef VK_DEVICE_PFN
|
||||
|
|
|
@ -83,7 +83,7 @@ idl_generator = generator(idl_compiler,
|
|||
arguments : [ '-h', '-o', '@OUTPUT@', '@INPUT@' ])
|
||||
|
||||
glsl_compiler = find_program('glslangValidator')
|
||||
glsl_args = [ '-V', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
|
||||
glsl_args = [ '-V', '--target-env', 'vulkan1.1', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
|
||||
if run_command(glsl_compiler, [ '--quiet', '--version' ], check : false).returncode() == 0
|
||||
glsl_args += [ '--quiet' ]
|
||||
endif
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit e08570fd2aa2bbe5bb374bee6be6bb7b7c5a1748
|
||||
Subproject commit b1afbf5fa8e6f10f6c226cea222a8a5d5518870f
|
|
@ -1449,6 +1449,696 @@ void test_vbv_stride_edge_cases(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_execute_indirect_state(void)
|
||||
{
|
||||
static const struct vec4 values = { 1000.0f, 2000.0f, 3000.0f, 4000.0f };
|
||||
D3D12_INDIRECT_ARGUMENT_DESC indirect_argument_descs[2];
|
||||
D3D12_COMMAND_SIGNATURE_DESC command_signature_desc;
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12CommandSignature *command_signature;
|
||||
D3D12_SO_DECLARATION_ENTRY so_entries[1];
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_ROOT_PARAMETER root_parameters[4];
|
||||
ID3D12RootSignature *root_signatures[2];
|
||||
ID3D12Resource *argument_buffer_late;
|
||||
D3D12_STREAM_OUTPUT_BUFFER_VIEW sov;
|
||||
ID3D12Resource *streamout_buffer;
|
||||
D3D12_VERTEX_BUFFER_VIEW vbvs[2];
|
||||
ID3D12Resource *argument_buffer;
|
||||
struct test_context_desc desc;
|
||||
ID3D12Resource *count_buffer;
|
||||
ID3D12PipelineState *psos[2];
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv;
|
||||
ID3D12CommandQueue *queue;
|
||||
const UINT so_stride = 16;
|
||||
ID3D12Resource *vbo[3];
|
||||
ID3D12Resource *ibo[2];
|
||||
unsigned int i, j, k;
|
||||
ID3D12Resource *cbv;
|
||||
ID3D12Resource *srv;
|
||||
ID3D12Resource *uav;
|
||||
HRESULT hr;
|
||||
|
||||
static const D3D12_INPUT_ELEMENT_DESC layout_desc[] =
|
||||
{
|
||||
{"COLOR", 0, DXGI_FORMAT_R32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
struct test
|
||||
{
|
||||
const D3D12_INDIRECT_ARGUMENT_DESC *indirect_arguments;
|
||||
uint32_t indirect_argument_count;
|
||||
const void *argument_buffer_data;
|
||||
size_t argument_buffer_size;
|
||||
uint32_t api_max_count;
|
||||
const struct vec4 *expected_output;
|
||||
uint32_t expected_output_count;
|
||||
uint32_t stride;
|
||||
uint32_t pso_index;
|
||||
bool needs_root_sig;
|
||||
};
|
||||
|
||||
/* Modify root parameters. */
|
||||
struct root_constant_data
|
||||
{
|
||||
float constants[2];
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 2 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
|
||||
};
|
||||
|
||||
static const struct root_constant_data root_constant_data[] =
|
||||
{
|
||||
{
|
||||
.constants = { 100.0f, 500.0f },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.constants = { 200.0f, 800.0f },
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
|
||||
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 root_constant_expected[] =
|
||||
{
|
||||
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
|
||||
};
|
||||
|
||||
/* Modify root parameters, but very large root signature to test boundary conditions. */
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_spill_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 44 + 1, .Num32BitValuesToSet = 2 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
|
||||
};
|
||||
|
||||
static const struct root_constant_data root_constant_spill_data[] =
|
||||
{
|
||||
{
|
||||
.constants = { 100.0f, 500.0f },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.constants = { 200.0f, 800.0f },
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
|
||||
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 root_constant_spill_expected[] =
|
||||
{
|
||||
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
|
||||
};
|
||||
|
||||
/* Modify VBOs. */
|
||||
struct indirect_vbo_data
|
||||
{
|
||||
D3D12_VERTEX_BUFFER_VIEW view[2];
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_sig[3] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
/* Fill buffer locations later. */
|
||||
struct indirect_vbo_data indirect_vbo_data[] =
|
||||
{
|
||||
{
|
||||
.view = { { 0, 64, 8 }, { 0, 64, 16 } },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 2 }
|
||||
},
|
||||
{
|
||||
/* Test indirectly binding NULL descriptor and 0 stride. */
|
||||
.view = { { 0, 0, 0 }, { 0, 64, 0 } },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
}
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_vbo_expected[] =
|
||||
{
|
||||
{ 1064.0f, 2128.0f, 3000.0f, 4000.0f },
|
||||
{ 1066.0f, 2132.0f, 3000.0f, 4000.0f },
|
||||
{ 1064.0f, 2128.0f, 3000.0f, 4001.0f },
|
||||
{ 1066.0f, 2132.0f, 3000.0f, 4001.0f },
|
||||
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. We seem to get null descriptor instead. */
|
||||
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. */
|
||||
};
|
||||
|
||||
/* Modify just one VBO. */
|
||||
struct indirect_vbo_one_data
|
||||
{
|
||||
D3D12_VERTEX_BUFFER_VIEW view;
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_one_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
/* Fill buffer locations later. */
|
||||
struct indirect_vbo_one_data indirect_vbo_one_data[] =
|
||||
{
|
||||
{
|
||||
.view = { 0, 64, 8 },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
}
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_vbo_one_expected[] =
|
||||
{
|
||||
{ 1128.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1130.0f, 2065.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
};
|
||||
|
||||
/* Indirect IBO */
|
||||
struct indirect_ibo_data
|
||||
{
|
||||
D3D12_INDEX_BUFFER_VIEW view;
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_ibo_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
struct indirect_ibo_data indirect_ibo_data[] =
|
||||
{
|
||||
{
|
||||
.view = { 0, 0, DXGI_FORMAT_R32_UINT },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.view = { 0, 64, DXGI_FORMAT_R16_UINT },
|
||||
.indexed = { .IndexCountPerInstance = 4, .InstanceCount = 1 }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_ibo_expected[] =
|
||||
{
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1016.0f, 2080.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1017.0f, 2081.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
};
|
||||
|
||||
/* Indirect root arguments */
|
||||
struct indirect_root_descriptor_data
|
||||
{
|
||||
D3D12_GPU_VIRTUAL_ADDRESS cbv;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS srv;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS uav;
|
||||
D3D12_DRAW_ARGUMENTS array;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_root_descriptor_sig[4] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW, .ShaderResourceView = { .RootParameterIndex = 2 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, .UnorderedAccessView = { .RootParameterIndex = 3 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
|
||||
};
|
||||
|
||||
struct indirect_root_descriptor_data indirect_root_descriptor_data[] =
|
||||
{
|
||||
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
|
||||
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_root_descriptor_expected[] =
|
||||
{
|
||||
{ 1000.0f, 2064.0f, 3000.0f + 64.0f, 4000.0f + 2.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f + 128.0f, 4000.0f + 3.0f },
|
||||
};
|
||||
|
||||
/* Test packing rules.
|
||||
* 64-bit aligned values are tightly packed with 32-bit alignment when they are in indirect command buffers. */
|
||||
struct indirect_alignment_data
|
||||
{
|
||||
float value;
|
||||
uint32_t cbv_va[2];
|
||||
D3D12_DRAW_ARGUMENTS arrays;
|
||||
};
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_alignment_sig[3] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
|
||||
};
|
||||
|
||||
struct indirect_alignment_data indirect_alignment_data[] =
|
||||
{
|
||||
{
|
||||
.value = 5.0f,
|
||||
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.value = 6.0f,
|
||||
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_alignment_expected[] =
|
||||
{
|
||||
{ 1000.0f, 69.0f, 3064.0f, 4000.0f },
|
||||
{ 1000.0f, 70.0f, 3128.0f, 4000.0f },
|
||||
};
|
||||
|
||||
#define DECL_TEST(t, pso_index, needs_root_sig) { t##_sig, ARRAY_SIZE(t##_sig), t##_data, sizeof(t##_data), ARRAY_SIZE(t##_data), \
|
||||
t##_expected, ARRAY_SIZE(t##_expected), sizeof(*(t##_data)), pso_index, needs_root_sig }
|
||||
const struct test tests[] =
|
||||
{
|
||||
DECL_TEST(root_constant, 0, true),
|
||||
DECL_TEST(indirect_vbo, 0, false),
|
||||
DECL_TEST(indirect_vbo_one, 0, false),
|
||||
DECL_TEST(indirect_ibo, 0, false),
|
||||
DECL_TEST(indirect_root_descriptor, 0, true),
|
||||
DECL_TEST(indirect_alignment, 0, true),
|
||||
DECL_TEST(root_constant_spill, 1, true),
|
||||
DECL_TEST(indirect_root_descriptor, 1, true),
|
||||
};
|
||||
#undef DECL_TEST
|
||||
|
||||
uint32_t ibo_data[ARRAY_SIZE(ibo)][64];
|
||||
float vbo_data[ARRAY_SIZE(vbo)][64];
|
||||
float generic_data[4096];
|
||||
|
||||
static const DWORD vs_code_small_cbv[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer RootCBV : register(b0)
|
||||
{
|
||||
float a;
|
||||
};
|
||||
|
||||
StructuredBuffer<float> RootSRV : register(t0);
|
||||
|
||||
cbuffer RootConstants : register(b0, space1)
|
||||
{
|
||||
float4 root;
|
||||
};
|
||||
|
||||
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
|
||||
{
|
||||
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x33b7b302, 0x34259b9b, 0x3e8568d9, 0x5a5e0c3e, 0x00000001, 0x00000268, 0x00000003,
|
||||
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
|
||||
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
|
||||
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
|
||||
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
|
||||
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x070000a2, 0x00307e46,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
|
||||
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
|
||||
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
|
||||
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x0030800a, 0x00000001, 0x00000000, 0x00000000, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
|
||||
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x00000000, 0x0b000000, 0x00102042, 0x00000000,
|
||||
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x00000000,
|
||||
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
|
||||
0x00000000, 0x0100003e,
|
||||
};
|
||||
|
||||
static const DWORD vs_code_large_cbv[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer RootCBV : register(b0)
|
||||
{
|
||||
float a;
|
||||
};
|
||||
|
||||
StructuredBuffer<float> RootSRV : register(t0);
|
||||
|
||||
cbuffer RootConstants : register(b0, space1)
|
||||
{
|
||||
// Cannot use arrays for root constants in D3D12.
|
||||
float4 pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
|
||||
float4 root;
|
||||
};
|
||||
|
||||
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
|
||||
{
|
||||
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x99a057e8, 0x20344569, 0x434f8a7a, 0xf9171e08, 0x00000001, 0x00000268, 0x00000003,
|
||||
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
|
||||
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
|
||||
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
|
||||
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
|
||||
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x0000000c, 0x00000001, 0x070000a2, 0x00307e46,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
|
||||
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
|
||||
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
|
||||
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x0030800a, 0x00000001, 0x00000000, 0x0000000b, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
|
||||
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x0000000b, 0x0b000000, 0x00102042, 0x00000000,
|
||||
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x0000000b,
|
||||
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
|
||||
0x0000000b, 0x0100003e,
|
||||
};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_root_signature = true;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(ibo); j++)
|
||||
for (i = 0; i < ARRAY_SIZE(ibo_data[j]); i++)
|
||||
ibo_data[j][i] = j * 16 + i;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(vbo); j++)
|
||||
for (i = 0; i < ARRAY_SIZE(vbo_data[j]); i++)
|
||||
vbo_data[j][i] = (float)(j * ARRAY_SIZE(vbo_data[j]) + i);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(generic_data); i++)
|
||||
generic_data[i] = (float)i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ibo); i++)
|
||||
ibo[i] = create_upload_buffer(context.device, sizeof(ibo_data[i]), ibo_data[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(vbo); i++)
|
||||
vbo[i] = create_upload_buffer(context.device, sizeof(vbo_data[i]), vbo_data[i]);
|
||||
cbv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
|
||||
srv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
|
||||
uav = create_default_buffer(context.device, sizeof(generic_data),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
indirect_vbo_data[0].view[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
|
||||
indirect_vbo_data[0].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
|
||||
indirect_vbo_data[1].view[0].BufferLocation = 0;
|
||||
indirect_vbo_data[1].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]) + 64;
|
||||
|
||||
indirect_vbo_one_data[0].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
|
||||
indirect_vbo_one_data[1].view.BufferLocation = 0;
|
||||
|
||||
indirect_ibo_data[1].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[1]);
|
||||
|
||||
indirect_root_descriptor_data[0].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 256;
|
||||
indirect_root_descriptor_data[0].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 8;
|
||||
indirect_root_descriptor_data[0].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 4;
|
||||
indirect_root_descriptor_data[1].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 512;
|
||||
indirect_root_descriptor_data[1].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 12;
|
||||
indirect_root_descriptor_data[1].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 8;
|
||||
|
||||
memcpy(indirect_alignment_data[0].cbv_va, &indirect_root_descriptor_data[0].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
|
||||
memcpy(indirect_alignment_data[1].cbv_va, &indirect_root_descriptor_data[1].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
|
||||
|
||||
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
|
||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
|
||||
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
|
||||
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
root_signature_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
root_parameters[0].Constants.RegisterSpace = 1;
|
||||
root_parameters[0].Constants.Num32BitValues = 4;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_parameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[0]);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
root_parameters[0].Constants.Num32BitValues = 48;
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[1]);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
|
||||
memset(so_entries, 0, sizeof(so_entries));
|
||||
so_entries[0].ComponentCount = 4;
|
||||
so_entries[0].SemanticName = "SV_Position";
|
||||
|
||||
memset(&pso_desc, 0, sizeof(pso_desc));
|
||||
pso_desc.VS.pShaderBytecode = vs_code_small_cbv;
|
||||
pso_desc.VS.BytecodeLength = sizeof(vs_code_small_cbv);
|
||||
pso_desc.StreamOutput.NumStrides = 1;
|
||||
pso_desc.StreamOutput.pBufferStrides = &so_stride;
|
||||
pso_desc.StreamOutput.pSODeclaration = so_entries;
|
||||
pso_desc.StreamOutput.NumEntries = ARRAY_SIZE(so_entries);
|
||||
pso_desc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;
|
||||
pso_desc.pRootSignature = root_signatures[0];
|
||||
pso_desc.SampleDesc.Count = 1;
|
||||
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
|
||||
pso_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc);
|
||||
pso_desc.InputLayout.pInputElementDescs = layout_desc;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[0]);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
pso_desc.VS.pShaderBytecode = vs_code_large_cbv;
|
||||
pso_desc.VS.BytecodeLength = sizeof(vs_code_large_cbv);
|
||||
pso_desc.pRootSignature = root_signatures[1];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[1]);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
|
||||
/* Verify sanity checks.
|
||||
* As per validation layers, there must be exactly one command in the signature.
|
||||
* It must come last. Verify that we check for this. */
|
||||
memset(&command_signature_desc, 0, sizeof(command_signature_desc));
|
||||
command_signature_desc.NumArgumentDescs = 1;
|
||||
command_signature_desc.pArgumentDescs = indirect_argument_descs;
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_VERTEX_BUFFER_VIEW);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
command_signature_desc.NumArgumentDescs = 2;
|
||||
command_signature_desc.pArgumentDescs = indirect_argument_descs;
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_VERTEX_BUFFER_VIEW);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
struct vec4 expect_reset_state[2];
|
||||
const struct vec4 *expect, *v;
|
||||
uint32_t expected_output_size;
|
||||
uint32_t clear_vbo_mask;
|
||||
uint32_t size;
|
||||
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
command_signature_desc.ByteStride = tests[i].stride;
|
||||
command_signature_desc.pArgumentDescs = tests[i].indirect_arguments;
|
||||
command_signature_desc.NumArgumentDescs = tests[i].indirect_argument_count;
|
||||
command_signature_desc.NodeMask = 0;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc,
|
||||
tests[i].needs_root_sig ? root_signatures[tests[i].pso_index] : NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(SUCCEEDED(hr), "Failed to create command signature, hr #%x.\n", hr);
|
||||
|
||||
argument_buffer = create_upload_buffer(context.device, 256 * 1024, NULL);
|
||||
argument_buffer_late = create_default_buffer(context.device, 256 * 1024,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
{
|
||||
void *ptr;
|
||||
ID3D12Resource_Map(argument_buffer, 0, NULL, &ptr);
|
||||
memcpy(ptr, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
|
||||
ID3D12Resource_Unmap(argument_buffer, 0, NULL);
|
||||
}
|
||||
|
||||
count_buffer = create_upload_buffer(context.device, sizeof(tests[i].api_max_count), &tests[i].api_max_count);
|
||||
streamout_buffer = create_default_buffer(context.device, 64 * 1024,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, root_signatures[tests[i].pso_index]);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, psos[tests[i].pso_index]);
|
||||
sov.SizeInBytes = 64 * 1024 - sizeof(struct vec4);
|
||||
sov.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer) + sizeof(struct vec4);
|
||||
sov.BufferFilledSizeLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer);
|
||||
ID3D12GraphicsCommandList_SOSetTargets(command_list, 0, 1, &sov);
|
||||
|
||||
/* Set up default rendering state. */
|
||||
ibv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[0]);
|
||||
ibv.SizeInBytes = sizeof(ibo_data[0]);
|
||||
ibv.Format = DXGI_FORMAT_R32_UINT;
|
||||
vbvs[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]);
|
||||
vbvs[0].SizeInBytes = sizeof(vbo_data[0]);
|
||||
vbvs[0].StrideInBytes = 4;
|
||||
vbvs[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
|
||||
vbvs[1].SizeInBytes = sizeof(vbo_data[1]);
|
||||
vbvs[1].StrideInBytes = 4;
|
||||
|
||||
ID3D12GraphicsCommandList_IASetIndexBuffer(command_list, &ibv);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_POINTLIST);
|
||||
ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, 2, vbvs);
|
||||
|
||||
for (j = 0; j < (tests[i].pso_index ? 12 : 1); j++)
|
||||
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &values, 4 * j);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(srv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav));
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, tests[i].api_max_count,
|
||||
argument_buffer, 0, NULL, 0);
|
||||
/* Test equivalent call with indirect count. */
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
|
||||
argument_buffer, 0, count_buffer, 0);
|
||||
/* Test equivalent, but now with late transition to INDIRECT. */
|
||||
ID3D12GraphicsCommandList_CopyResource(command_list, argument_buffer_late, argument_buffer);
|
||||
transition_resource_state(command_list, argument_buffer_late, D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
|
||||
argument_buffer_late, 0, count_buffer, 0);
|
||||
|
||||
/* Root descriptors which are part of the state block are cleared to NULL. Recover them here
|
||||
* since attempting to draw next test will crash GPU. */
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(srv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav));
|
||||
|
||||
/* Other state is cleared to 0. */
|
||||
|
||||
ID3D12GraphicsCommandList_DrawInstanced(command_list, 2, 1, 0, 0);
|
||||
transition_resource_state(command_list, streamout_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(streamout_buffer, DXGI_FORMAT_R32G32B32A32_FLOAT, &rb, queue, command_list);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
expected_output_size = (tests[i].expected_output_count * 3 + 2) * sizeof(struct vec4);
|
||||
size = get_readback_uint(&rb, 0, 0, 0);
|
||||
ok(size == expected_output_size, "Expected size %u, got %u.\n", expected_output_size, size);
|
||||
|
||||
for (j = 0; j < tests[i].expected_output_count; j++)
|
||||
{
|
||||
expect = &tests[i].expected_output[j];
|
||||
v = get_readback_vec4(&rb, j + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (direct count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
|
||||
v = get_readback_vec4(&rb, j + tests[i].expected_output_count + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (indirect count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
|
||||
v = get_readback_vec4(&rb, j + 2 * tests[i].expected_output_count + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (late latch) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
}
|
||||
|
||||
clear_vbo_mask = 0;
|
||||
expect_reset_state[0] = values;
|
||||
|
||||
/* Root constant state is cleared to zero if it's part of the signature. */
|
||||
for (j = 0; j < tests[i].indirect_argument_count; j++)
|
||||
{
|
||||
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT)
|
||||
{
|
||||
for (k = 0; k < tests[i].indirect_arguments[j].Constant.Num32BitValuesToSet; k++)
|
||||
(&expect_reset_state[0].x)[(tests[i].indirect_arguments[j].Constant.DestOffsetIn32BitValues + k) % 4] = 0.0f;
|
||||
}
|
||||
else if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW)
|
||||
clear_vbo_mask |= 1u << tests[i].indirect_arguments[j].VertexBuffer.Slot;
|
||||
}
|
||||
|
||||
expect_reset_state[1] = expect_reset_state[0];
|
||||
|
||||
/* VBO/IBO state is cleared to zero if it's part of the signature.
|
||||
* A NULL IBO should be seen as a IBO which only reads 0 index. */
|
||||
if (!(clear_vbo_mask & (1u << 0)))
|
||||
expect_reset_state[1].x += 1.0f;
|
||||
|
||||
if (!(clear_vbo_mask & (1u << 1)))
|
||||
{
|
||||
expect_reset_state[0].y += 64.0f;
|
||||
expect_reset_state[1].y += 65.0f;
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
v = get_readback_vec4(&rb, j + 1 + 3 * tests[i].expected_output_count, 0);
|
||||
expect = &expect_reset_state[j];
|
||||
ok(compare_vec4(v, expect, 0), "Post-reset element %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
}
|
||||
|
||||
ID3D12CommandSignature_Release(command_signature);
|
||||
ID3D12Resource_Release(argument_buffer);
|
||||
ID3D12Resource_Release(argument_buffer_late);
|
||||
ID3D12Resource_Release(count_buffer);
|
||||
ID3D12Resource_Release(streamout_buffer);
|
||||
release_resource_readback(&rb);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
ID3D12PipelineState_Release(psos[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(root_signatures); i++)
|
||||
ID3D12RootSignature_Release(root_signatures[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(vbo); i++)
|
||||
ID3D12Resource_Release(vbo[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(ibo); i++)
|
||||
ID3D12Resource_Release(ibo[i]);
|
||||
ID3D12Resource_Release(cbv);
|
||||
ID3D12Resource_Release(srv);
|
||||
ID3D12Resource_Release(uav);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_execute_indirect(void)
|
||||
{
|
||||
ID3D12Resource *argument_buffer, *count_buffer, *uav;
|
||||
|
|
|
@ -135,6 +135,7 @@ decl_test(test_resolve_non_issued_query_data);
|
|||
decl_test(test_resolve_query_data_in_different_command_list);
|
||||
decl_test(test_resolve_query_data_in_reordered_command_list);
|
||||
decl_test(test_execute_indirect);
|
||||
decl_test(test_execute_indirect_state);
|
||||
decl_test(test_dispatch_zero_thread_groups);
|
||||
decl_test(test_unaligned_vertex_stride);
|
||||
decl_test(test_zero_vertex_stride);
|
||||
|
|
Loading…
Reference in New Issue