From 19193bf932494a4eb4317f6285dbc66fb470586e Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 23 Nov 2020 23:16:43 +0100 Subject: [PATCH] vkd3d: Sanitize VBO strides and VBO offsets. Realign VBO strides and offsets if we have to, for sake of robustness. Violating these rules is against D3D12 spec, but it does not cause crashes on native drivers. On RDNA we can hit hangs with unaligned vertex attributes. It appears that native drivers apply some kind of fixup here to avoid the crash, even if the result is not what we expect. Signed-off-by: Hans-Kristian Arntzen --- libs/vkd3d/command.c | 84 ++++++++++++++++++++++++++++++++++++++ libs/vkd3d/state.c | 38 ++++++++++++++++- libs/vkd3d/vkd3d_private.h | 1 + 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ea074a55..b158e990 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -3123,6 +3123,25 @@ static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list return true; } +static void d3d12_command_list_check_vbo_alignment(struct d3d12_command_list *list) +{ + const uint32_t *stride_masks; + VkDeviceSize *offsets; + uint32_t update_vbos; + unsigned int index; + + stride_masks = list->state->graphics.vertex_buffer_stride_align_mask; + update_vbos = list->state->graphics.vertex_buffer_mask; + offsets = list->dynamic_state.vertex_offsets; + + while (update_vbos) + { + index = vkd3d_bitmask_iter32(&update_vbos); + if (stride_masks[index] & offsets[index]) + list->dynamic_state.dirty_vbos |= 1u << index; + } +} + static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_list *list) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; @@ -3170,6 +3189,12 @@ static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_lis { VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline)); + /* If we bind a new pipeline, make sure that we end up binding VBOs that are aligned. + * It is fine to do it here, since we are binding a pipeline right before we perform + * a draw call. If we trip any dirty check here, VBO offsets will be fixed up when emitting + * dynamic state after this. */ + d3d12_command_list_check_vbo_alignment(list); + /* The application did set vertex buffers that we didn't bind because of the pipeline vbo mask. * The new pipeline could use those so we need to rebind vertex buffers. */ if ((new_active_flags & (VKD3D_DYNAMIC_STATE_VERTEX_BUFFER | VKD3D_DYNAMIC_STATE_VERTEX_BUFFER_STRIDE)) @@ -3540,8 +3565,10 @@ static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *l { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; struct vkd3d_dynamic_state *dyn_state = &list->dynamic_state; + const uint32_t *stride_align_masks; struct vkd3d_bitmask_range range; uint32_t update_vbos; + unsigned int i; /* Make sure we only update states that are dynamic in the pipeline */ dyn_state->dirty_flags &= list->dynamic_state.active_flags; @@ -3597,10 +3624,38 @@ static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *l update_vbos = (dyn_state->dirty_vbos | dyn_state->dirty_vbo_strides) & list->state->graphics.vertex_buffer_mask; dyn_state->dirty_vbos &= ~update_vbos; dyn_state->dirty_vbo_strides &= ~update_vbos; + stride_align_masks = list->state->graphics.vertex_buffer_stride_align_mask; while (update_vbos) { range = vkd3d_bitmask_iter32_range(&update_vbos); + + for (i = 0; i < range.count; i++) + { + if (dyn_state->vertex_offsets[i + range.offset] & stride_align_masks[i + range.offset]) + { + FIXME("Binding VBO at offset %"PRIu64", but required alignment is %u.\n", + dyn_state->vertex_offsets[i + range.offset], + stride_align_masks[i + range.offset] + 1); + + /* This modifies global state, but if app hits this, it's already buggy. */ + dyn_state->vertex_offsets[i + range.offset] &= ~(VkDeviceSize)stride_align_masks[i + range.offset]; + } + + if (dyn_state->vertex_strides[i + range.offset] & stride_align_masks[i + range.offset]) + { + FIXME("Binding VBO with stride %"PRIu64", but required alignment is %u.\n", + dyn_state->vertex_strides[i + range.offset], + stride_align_masks[i + range.offset] + 1); + + /* This modifies global state, but if app hits this, it's already buggy. + * Round up, so that we don't hit offset > size case with dynamic strides. */ + dyn_state->vertex_strides[i + range.offset] = + (dyn_state->vertex_strides[i + range.offset] + stride_align_masks[i + range.offset]) & + ~(VkDeviceSize)stride_align_masks[i + range.offset]; + } + } + VK_CALL(vkCmdBindVertexBuffers2EXT(list->vk_command_buffer, range.offset, range.count, dyn_state->vertex_buffers + range.offset, @@ -3614,10 +3669,39 @@ static void d3d12_command_list_update_dynamic_state(struct d3d12_command_list *l update_vbos = dyn_state->dirty_vbos & list->state->graphics.vertex_buffer_mask; dyn_state->dirty_vbos &= ~update_vbos; dyn_state->dirty_vbo_strides &= ~update_vbos; + stride_align_masks = list->state->graphics.vertex_buffer_stride_align_mask; while (update_vbos) { range = vkd3d_bitmask_iter32_range(&update_vbos); + + for (i = 0; i < range.count; i++) + { + if (dyn_state->vertex_offsets[i + range.offset] & stride_align_masks[i + range.offset]) + { + FIXME("Binding VBO at offset %"PRIu64", but required alignment is %u.\n", + dyn_state->vertex_offsets[i + range.offset], + stride_align_masks[i + range.offset] + 1); + dyn_state->vertex_offsets[i + range.offset] &= ~(VkDeviceSize)stride_align_masks[i + range.offset]; + } + + if (list->device->device_info.extended_dynamic_state_features.extendedDynamicState) + { + if (dyn_state->vertex_strides[i + range.offset] & stride_align_masks[i + range.offset]) + { + FIXME("Binding VBO with stride %"PRIu64", but required alignment is %u.\n", + dyn_state->vertex_strides[i + range.offset], + stride_align_masks[i + range.offset] + 1); + + /* This modifies global state, but if app hits this, it's already buggy. + * Round up, so that we don't hit offset > size case with dynamic strides. */ + dyn_state->vertex_strides[i + range.offset] = + (dyn_state->vertex_strides[i + range.offset] + stride_align_masks[i + range.offset]) & + ~(VkDeviceSize)stride_align_masks[i + range.offset]; + } + } + } + if (list->device->device_info.extended_dynamic_state_features.extendedDynamicState) { VK_CALL(vkCmdBindVertexBuffers2EXT(list->vk_command_buffer, diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 8a1b177d..a840ae4d 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -2703,6 +2703,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->instance_divisor_count = 0; graphics->attribute_binding_count = 0; memset(graphics->minimum_vertex_buffer_dynamic_stride, 0, sizeof(graphics->minimum_vertex_buffer_dynamic_stride)); + memset(graphics->vertex_buffer_stride_align_mask, 0, sizeof(graphics->vertex_buffer_stride_align_mask)); for (i = 0, j = 0, mask = 0; i < graphics->attribute_count; ++i) { @@ -2743,6 +2744,9 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s max(graphics->minimum_vertex_buffer_dynamic_stride[e->InputSlot], graphics->attributes[j].offset + format->byte_count); + graphics->vertex_buffer_stride_align_mask[e->InputSlot] = + max(graphics->vertex_buffer_stride_align_mask[e->InputSlot], format->byte_count); + ++j; switch (e->InputSlotClass) @@ -2802,6 +2806,27 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->vertex_buffer_mask = mask; vkd3d_shader_free_shader_signature(&input_signature); + for (i = 0; i < D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; i++) + { + if (mask & (1u << i)) + { + /* In D3D12, VBO strides must be aligned to min(4, max-for-all(format->byte_size)) for a given input slot. + * Native drivers and Windows Vulkan drivers happen to be robust against buggy applications + * which use unaligned formats, but Vulkan drivers are not always robust here (they don't have to be). + * AMD Windows D3D12/Vulkan drivers return the wrong result for unaligned VBO strides, + * but it doesn't crash at least! + * RDNA will hang if we emit tbuffer_loads which don't conform to D3D12 rules. */ + + /* Essentially all VBO types in D3D12 are POT sized, so this works. + * The exception to this is RGB32, which will clamp to 4 anyways. + * Two potential formats that could mess this analysis up is RGB8 and RGB16, + * but neither of these formats exist. */ + graphics->vertex_buffer_stride_align_mask[i] = min(graphics->vertex_buffer_stride_align_mask[i], 4u); + /* POT - 1 for an alignment mask. */ + graphics->vertex_buffer_stride_align_mask[i] -= 1; + } + } + switch (desc->strip_cut_value) { case D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED: @@ -3342,6 +3367,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta struct d3d12_graphics_pipeline_state *graphics = &state->graphics; struct d3d12_device *device = state->device; struct vkd3d_pipeline_key pipeline_key; + uint32_t stride, stride_align_mask; bool extended_dynamic_state; VkPipeline vk_pipeline; unsigned int i; @@ -3372,7 +3398,17 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta else { for (i = 0; i < graphics->attribute_binding_count; ++i) - pipeline_key.strides[i] = dyn_state->vertex_strides[graphics->attribute_bindings[i].binding]; + { + stride = dyn_state->vertex_strides[graphics->attribute_bindings[i].binding]; + stride_align_mask = state->graphics.vertex_buffer_stride_align_mask[graphics->attribute_bindings[i].binding]; + if (stride & stride_align_mask) + { + FIXME("Attempting to use VBO stride of %u bytes, but D3D12 requires alignment of %u bytes. VBO stride will be adjusted.\n", + stride, stride_align_mask + 1); + stride &= ~stride_align_mask; + } + pipeline_key.strides[i] = stride; + } } pipeline_key.dsv_format = dsv_format; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 935db852..8a97f0bc 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -914,6 +914,7 @@ struct d3d12_graphics_pipeline_state VkVertexInputBindingDivisorDescriptionEXT instance_divisors[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; VkVertexInputBindingDescription attribute_bindings[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; uint32_t minimum_vertex_buffer_dynamic_stride[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + uint32_t vertex_buffer_stride_align_mask[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; size_t instance_divisor_count; size_t attribute_binding_count; size_t attribute_count;