vkd3d: Sanitize VBO strides and VBO offsets.

Realign VBO strides and offsets if we have to, for sake of
robustness. Violating these rules is against D3D12 spec, but it does not
cause crashes on native drivers. On RDNA we can hit hangs with unaligned
vertex attributes. It appears that native drivers apply some kind of
fixup here to avoid the crash, even if the result is not what we expect.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2020-11-23 23:16:43 +01:00
parent 10b503c893
commit 19193bf932
3 changed files with 122 additions and 1 deletions

View File

@ -3123,6 +3123,25 @@ static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list
return true;
}
static void d3d12_command_list_check_vbo_alignment(struct d3d12_command_list *list)
{
const uint32_t *stride_masks;
VkDeviceSize *offsets;
uint32_t update_vbos;
unsigned int index;
stride_masks = list->state->graphics.vertex_buffer_stride_align_mask;
update_vbos = list->state->graphics.vertex_buffer_mask;
offsets = list->dynamic_state.vertex_offsets;
while (update_vbos)
{
index = vkd3d_bitmask_iter32(&update_vbos);
if (stride_masks[index] & offsets[index])
list->dynamic_state.dirty_vbos |= 1u << index;
}
}
static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_list *list)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
@ -3170,6 +3189,12 @@ static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_lis
{
VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline));
/* If we bind a new pipeline, make sure that we end up binding VBOs that are aligned.
* It is fine to do it here, since we are binding a pipeline right before we perform
* a draw call. If we trip any dirty check here, VBO offsets will be fixed up when emitting
* dynamic state after this. */
d3d12_command_list_check_vbo_alignment(list);
/* The application did set vertex buffers that we didn't bind because of the pipeline vbo mask.
* The new pipeline could use those so we need to rebind vertex buffers. */
if ((new_active_flags & (VKD3D_DYNAMIC_STATE_VERTEX_BUFFER | VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE))
@ -3540,8 +3565,10 @@ static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *l
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state;
const uint32_t *stride_align_masks;
struct vkd3d_bitmask_range range;
uint32_t update_vbos;
unsigned int i;
/* Make sure we only update states that are dynamic in the pipeline */
dyn_state->dirty_flags &= list->dynamic_state.active_flags;
@ -3597,10 +3624,38 @@ static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *l
update_vbos = (dyn_state->dirty_vbos | dyn_state->dirty_vbo_strides) & list->state->graphics.vertex_buffer_mask;
dyn_state->dirty_vbos &= ~update_vbos;
dyn_state->dirty_vbo_strides &= ~update_vbos;
stride_align_masks = list->state->graphics.vertex_buffer_stride_align_mask;
while (update_vbos)
{
range = vkd3d_bitmask_iter32_range(&update_vbos);
for (i = 0; i < range.count; i++)
{
if (dyn_state->vertex_offsets[i + range.offset] & stride_align_masks[i + range.offset])
{
FIXME("Binding VBO at offset %"PRIu64", but required alignment is %u.\n",
dyn_state->vertex_offsets[i + range.offset],
stride_align_masks[i + range.offset] + 1);
/* This modifies global state, but if app hits this, it's already buggy. */
dyn_state->vertex_offsets[i + range.offset] &= ~(VkDeviceSize)stride_align_masks[i + range.offset];
}
if (dyn_state->vertex_strides[i + range.offset] & stride_align_masks[i + range.offset])
{
FIXME("Binding VBO with stride %"PRIu64", but required alignment is %u.\n",
dyn_state->vertex_strides[i + range.offset],
stride_align_masks[i + range.offset] + 1);
/* This modifies global state, but if app hits this, it's already buggy.
* Round up, so that we don't hit offset > size case with dynamic strides. */
dyn_state->vertex_strides[i + range.offset] =
(dyn_state->vertex_strides[i + range.offset] + stride_align_masks[i + range.offset]) &
~(VkDeviceSize)stride_align_masks[i + range.offset];
}
}
VK_CALL(vkCmdBindVertexBuffers2EXT(list->vk_command_buffer,
range.offset, range.count,
dyn_state->vertex_buffers + range.offset,
@ -3614,10 +3669,39 @@ static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *l
update_vbos = dyn_state->dirty_vbos & list->state->graphics.vertex_buffer_mask;
dyn_state->dirty_vbos &= ~update_vbos;
dyn_state->dirty_vbo_strides &= ~update_vbos;
stride_align_masks = list->state->graphics.vertex_buffer_stride_align_mask;
while (update_vbos)
{
range = vkd3d_bitmask_iter32_range(&update_vbos);
for (i = 0; i < range.count; i++)
{
if (dyn_state->vertex_offsets[i + range.offset] & stride_align_masks[i + range.offset])
{
FIXME("Binding VBO at offset %"PRIu64", but required alignment is %u.\n",
dyn_state->vertex_offsets[i + range.offset],
stride_align_masks[i + range.offset] + 1);
dyn_state->vertex_offsets[i + range.offset] &= ~(VkDeviceSize)stride_align_masks[i + range.offset];
}
if (list->device->device_info.extended_dynamic_state_features.extendedDynamicState)
{
if (dyn_state->vertex_strides[i + range.offset] & stride_align_masks[i + range.offset])
{
FIXME("Binding VBO with stride %"PRIu64", but required alignment is %u.\n",
dyn_state->vertex_strides[i + range.offset],
stride_align_masks[i + range.offset] + 1);
/* This modifies global state, but if app hits this, it's already buggy.
* Round up, so that we don't hit offset > size case with dynamic strides. */
dyn_state->vertex_strides[i + range.offset] =
(dyn_state->vertex_strides[i + range.offset] + stride_align_masks[i + range.offset]) &
~(VkDeviceSize)stride_align_masks[i + range.offset];
}
}
}
if (list->device->device_info.extended_dynamic_state_features.extendedDynamicState)
{
VK_CALL(vkCmdBindVertexBuffers2EXT(list->vk_command_buffer,

View File

@ -2703,6 +2703,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
graphics->instance_divisor_count = 0;
graphics->attribute_binding_count = 0;
memset(graphics->minimum_vertex_buffer_dynamic_stride, 0, sizeof(graphics->minimum_vertex_buffer_dynamic_stride));
memset(graphics->vertex_buffer_stride_align_mask, 0, sizeof(graphics->vertex_buffer_stride_align_mask));
for (i = 0, j = 0, mask = 0; i < graphics->attribute_count; ++i)
{
@ -2743,6 +2744,9 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
max(graphics->minimum_vertex_buffer_dynamic_stride[e->InputSlot],
graphics->attributes[j].offset + format->byte_count);
graphics->vertex_buffer_stride_align_mask[e->InputSlot] =
max(graphics->vertex_buffer_stride_align_mask[e->InputSlot], format->byte_count);
++j;
switch (e->InputSlotClass)
@ -2802,6 +2806,27 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
graphics->vertex_buffer_mask = mask;
vkd3d_shader_free_shader_signature(&input_signature);
for (i = 0; i < D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; i++)
{
if (mask & (1u << i))
{
/* In D3D12, VBO strides must be aligned to min(4, max-for-all(format->byte_size)) for a given input slot.
* Native drivers and Windows Vulkan drivers happen to be robust against buggy applications
* which use unaligned formats, but Vulkan drivers are not always robust here (they don't have to be).
* AMD Windows D3D12/Vulkan drivers return the wrong result for unaligned VBO strides,
* but it doesn't crash at least!
* RDNA will hang if we emit tbuffer_loads which don't conform to D3D12 rules. */
/* Essentially all VBO types in D3D12 are POT sized, so this works.
* The exception to this is RGB32, which will clamp to 4 anyways.
* Two potential formats that could mess this analysis up is RGB8 and RGB16,
* but neither of these formats exist. */
graphics->vertex_buffer_stride_align_mask[i] = min(graphics->vertex_buffer_stride_align_mask[i], 4u);
/* POT - 1 for an alignment mask. */
graphics->vertex_buffer_stride_align_mask[i] -= 1;
}
}
switch (desc->strip_cut_value)
{
case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED:
@ -3342,6 +3367,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta
struct d3d12_graphics_pipeline_state *graphics = &state->graphics;
struct d3d12_device *device = state->device;
struct vkd3d_pipeline_key pipeline_key;
uint32_t stride, stride_align_mask;
bool extended_dynamic_state;
VkPipeline vk_pipeline;
unsigned int i;
@ -3372,7 +3398,17 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta
else
{
for (i = 0; i < graphics->attribute_binding_count; ++i)
pipeline_key.strides[i] = dyn_state->vertex_strides[graphics->attribute_bindings[i].binding];
{
stride = dyn_state->vertex_strides[graphics->attribute_bindings[i].binding];
stride_align_mask = state->graphics.vertex_buffer_stride_align_mask[graphics->attribute_bindings[i].binding];
if (stride & stride_align_mask)
{
FIXME("Attempting to use VBO stride of %u bytes, but D3D12 requires alignment of %u bytes. VBO stride will be adjusted.\n",
stride, stride_align_mask + 1);
stride &= ~stride_align_mask;
}
pipeline_key.strides[i] = stride;
}
}
pipeline_key.dsv_format = dsv_format;

View File

@ -914,6 +914,7 @@ struct d3d12_graphics_pipeline_state
VkVertexInputBindingDivisorDescriptionEXT instance_divisors[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
VkVertexInputBindingDescription attribute_bindings[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
uint32_t minimum_vertex_buffer_dynamic_stride[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
uint32_t vertex_buffer_stride_align_mask[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
size_t instance_divisor_count;
size_t attribute_binding_count;
size_t attribute_count;