diff --git a/meson.build b/meson.build index 6724d2cd23e..507c9730095 100644 --- a/meson.build +++ b/meson.build @@ -276,6 +276,7 @@ with_freedreno_virtio = get_option('freedreno-virtio') with_broadcom_vk = _vulkan_drivers.contains('broadcom') with_imagination_vk = _vulkan_drivers.contains('imagination-experimental') with_imagination_srv = get_option('imagination-srv') +with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental') with_any_vk = _vulkan_drivers.length() != 0 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk @@ -667,11 +668,11 @@ if with_gallium_zink endif dep_dxheaders = null_dep -if with_gallium_d3d12 or with_microsoft_clc +if with_gallium_d3d12 or with_microsoft_clc or with_microsoft_vk dep_dxheaders = dependency('directx-headers', required : false) if not dep_dxheaders.found() dep_dxheaders = dependency('DirectX-Headers', fallback : ['DirectX-Headers', 'dep_dxheaders'], - required : with_gallium_d3d12 + required : with_gallium_d3d12 or with_microsoft_vk ) endif endif diff --git a/meson_options.txt b/meson_options.txt index 7eaf69e28d8..9e51ccd3ac7 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -192,7 +192,7 @@ option( 'vulkan-drivers', type : 'array', value : ['auto'], - choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'panfrost', 'swrast', 'virtio-experimental'], + choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'microsoft-experimental', 'panfrost', 'swrast', 'virtio-experimental'], description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) option( diff --git a/src/meson.build b/src/meson.build index 2ef90f2efba..1d42e08df30 100644 --- a/src/meson.build +++ b/src/meson.build @@ -104,7 +104,7 @@ endif if with_gallium_virgl or with_virtio_vk subdir('virtio') endif -if with_microsoft_clc or with_gallium_d3d12 or with_spirv_to_dxil +if with_microsoft_clc or with_gallium_d3d12 or with_spirv_to_dxil or with_microsoft_vk subdir('microsoft') endif if with_gallium_nouveau diff --git a/src/microsoft/meson.build b/src/microsoft/meson.build index a243d9e0f70..ea961bcefef 100644 --- a/src/microsoft/meson.build +++ b/src/microsoft/meson.build @@ -26,6 +26,9 @@ endif if with_gallium_d3d12 subdir('resource_state_manager') endif -if with_spirv_to_dxil +if with_spirv_to_dxil or with_microsoft_vk subdir('spirv_to_dxil') endif +if with_microsoft_vk + subdir('vulkan') +endif diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.cpp b/src/microsoft/vulkan/dzn_cmd_buffer.cpp new file mode 100644 index 00000000000..bb9cf426c92 --- /dev/null +++ b/src/microsoft/vulkan/dzn_cmd_buffer.cpp @@ -0,0 +1,4119 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_format.h" +#include "vk_util.h" + +static void +dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf) +{ + if (!cbuf) + return; + + dzn_cmd_buffer *cmdbuf = container_of(cbuf, dzn_cmd_buffer, vk); + dzn_device *device = container_of(cbuf->base.device, dzn_device, vk); + + if (cmdbuf->cmdlist) + cmdbuf->cmdlist->Release(); + + if (cmdbuf->cmdalloc) + cmdbuf->cmdalloc->Release(); + + list_for_each_entry_safe(dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { + list_del(&res->link); + res->res->Release(); + vk_free(&cbuf->pool->alloc, res); + } + + dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool); + dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool); + dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool); + dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool); + util_dynarray_fini(&cmdbuf->events.wait); + util_dynarray_fini(&cmdbuf->events.signal); + util_dynarray_fini(&cmdbuf->queries.reset); + util_dynarray_fini(&cmdbuf->queries.wait); + util_dynarray_fini(&cmdbuf->queries.signal); + + if (cmdbuf->rtvs.ht) { + hash_table_foreach(cmdbuf->rtvs.ht, he) + vk_free(&cbuf->pool->alloc, he->data); + _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL); + } + + if (cmdbuf->dsvs.ht) { + hash_table_foreach(cmdbuf->dsvs.ht, he) + vk_free(&cbuf->pool->alloc, he->data); + _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL); + } + + if (cmdbuf->events.ht) + _mesa_hash_table_destroy(cmdbuf->events.ht, NULL); + + if (cmdbuf->queries.ht) { + hash_table_foreach(cmdbuf->queries.ht, he) { + dzn_cmd_buffer_query_pool_state *qpstate = + (dzn_cmd_buffer_query_pool_state *)he->data; + util_dynarray_fini(&qpstate->reset); + util_dynarray_fini(&qpstate->collect); + util_dynarray_fini(&qpstate->wait); + util_dynarray_fini(&qpstate->signal); + vk_free(&cbuf->pool->alloc, he->data); + } + _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL); + } + + vk_command_buffer_finish(&cmdbuf->vk); + vk_free(&cbuf->pool->alloc, cmdbuf); +} + +static uint32_t +dzn_cmd_buffer_rtv_key_hash_function(const void *key) +{ + return _mesa_hash_data(key, sizeof(dzn_cmd_buffer_rtv_key)); +} + +static bool +dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(dzn_cmd_buffer_rtv_key)) == 0; +} + +static uint32_t +dzn_cmd_buffer_dsv_key_hash_function(const void *key) +{ + return _mesa_hash_data(key, sizeof(dzn_cmd_buffer_dsv_key)); +} + +static bool +dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(dzn_cmd_buffer_dsv_key)) == 0; +} + +static VkResult +dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info, + VkCommandBuffer *out) +{ + VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool); + dzn_device *device = container_of(pool->base.device, dzn_device, vk); + dzn_physical_device *pdev = + container_of(device->vk.physical, dzn_physical_device, vk); + + assert(pool->queue_family_index < pdev->queue_family_count); + + D3D12_COMMAND_LIST_TYPE type = + pdev->queue_families[pool->queue_family_index].desc.Type; + + dzn_cmd_buffer *cmdbuf = (dzn_cmd_buffer *) + vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!cmdbuf) + return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = + vk_command_buffer_init(&cmdbuf->vk, pool, info->level); + if (result != VK_SUCCESS) { + vk_free(&pool->alloc, cmdbuf); + return result; + } + + memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); + list_inithead(&cmdbuf->internal_bufs); + util_dynarray_init(&cmdbuf->events.wait, NULL); + util_dynarray_init(&cmdbuf->events.signal, NULL); + util_dynarray_init(&cmdbuf->queries.reset, NULL); + util_dynarray_init(&cmdbuf->queries.wait, NULL); + util_dynarray_init(&cmdbuf->queries.signal, NULL); + dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + false, &pool->alloc); + dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_DSV, + false, &pool->alloc); + dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + true, &pool->alloc); + dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + true, &pool->alloc); + + cmdbuf->events.ht = + _mesa_pointer_hash_table_create(NULL); + cmdbuf->queries.ht = + _mesa_pointer_hash_table_create(NULL); + cmdbuf->rtvs.ht = + _mesa_hash_table_create(NULL, + dzn_cmd_buffer_rtv_key_hash_function, + dzn_cmd_buffer_rtv_key_equals_function); + cmdbuf->dsvs.ht = + _mesa_hash_table_create(NULL, + dzn_cmd_buffer_dsv_key_hash_function, + dzn_cmd_buffer_dsv_key_equals_function); + if (!cmdbuf->events.ht || !cmdbuf->queries.ht || + !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + + cmdbuf->vk.destroy = dzn_cmd_buffer_destroy; + + if (FAILED(device->dev->CreateCommandAllocator(type, + IID_PPV_ARGS(&cmdbuf->cmdalloc)))) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + + if (FAILED(device->dev->CreateCommandList(0, type, + cmdbuf->cmdalloc, NULL, + IID_PPV_ARGS(&cmdbuf->cmdlist)))) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + +out: + if (result != VK_SUCCESS) + dzn_cmd_buffer_destroy(&cmdbuf->vk); + else + *out = dzn_cmd_buffer_to_handle(cmdbuf); + + return result; +} + +VkResult +dzn_cmd_buffer_reset(dzn_cmd_buffer *cmdbuf) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + /* Reset the state */ + memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); + + /* TODO: Return resources to the pool */ + list_for_each_entry_safe(dzn_internal_resource, res, &cmdbuf->internal_bufs, link) { + list_del(&res->link); + res->res->Release(); + vk_free(&cmdbuf->vk.pool->alloc, res); + } + + cmdbuf->error = VK_SUCCESS; + util_dynarray_clear(&cmdbuf->events.wait); + util_dynarray_clear(&cmdbuf->events.signal); + util_dynarray_clear(&cmdbuf->queries.reset); + util_dynarray_clear(&cmdbuf->queries.wait); + util_dynarray_clear(&cmdbuf->queries.signal); + hash_table_foreach(cmdbuf->rtvs.ht, he) + vk_free(&cmdbuf->vk.pool->alloc, he->data); + _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL); + dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool); + hash_table_foreach(cmdbuf->dsvs.ht, he) + vk_free(&cmdbuf->vk.pool->alloc, he->data); + _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL); + hash_table_foreach(cmdbuf->queries.ht, he) { + dzn_cmd_buffer_query_pool_state *qpstate = + (dzn_cmd_buffer_query_pool_state *)he->data; + util_dynarray_fini(&qpstate->reset); + util_dynarray_fini(&qpstate->collect); + util_dynarray_fini(&qpstate->wait); + util_dynarray_fini(&qpstate->signal); + vk_free(&cmdbuf->vk.pool->alloc, he->data); + } + _mesa_hash_table_clear(cmdbuf->queries.ht, NULL); + _mesa_hash_table_clear(cmdbuf->events.ht, NULL); + dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool); + dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool); + dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool); + vk_command_buffer_reset(&cmdbuf->vk); + + /* cmdlist->Reset() doesn't return the memory back the the command list + * allocator, and cmdalloc->Reset() can only be called if there's no live + * cmdlist allocated from the allocator, so we need to release and create + * a new command list. + */ + cmdbuf->cmdlist->Release(); + cmdbuf->cmdlist = NULL; + cmdbuf->cmdalloc->Reset(); + if (FAILED(device->dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + cmdbuf->cmdalloc, NULL, + IID_PPV_ARGS(&cmdbuf->cmdlist)))) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return cmdbuf->error; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_AllocateCommandBuffers(VkDevice device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); + VK_FROM_HANDLE(dzn_device, dev, device); + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = dzn_cmd_buffer_create(pAllocateInfo, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool, + i, pCommandBuffers); + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) + pCommandBuffers[i] = VK_NULL_HANDLE; + } + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_ResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + return dzn_cmd_buffer_reset(cmdbuf); +} + +VkResult +dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + return dzn_cmd_buffer_reset(cmdbuf); +} + +static void +dzn_cmd_buffer_gather_events(dzn_cmd_buffer *cmdbuf) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + if (cmdbuf->error != VK_SUCCESS) + goto out; + + hash_table_foreach(cmdbuf->events.ht, he) { + enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data; + + if (state != DZN_EVENT_STATE_EXTERNAL_WAIT) { + dzn_cmd_event_signal signal = { (dzn_event *)he->key, state == DZN_EVENT_STATE_SET }; + dzn_cmd_event_signal *entry = (dzn_cmd_event_signal *) + util_dynarray_grow(&cmdbuf->events.signal, dzn_cmd_event_signal, 1); + + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + } + + *entry = signal; + } + } + +out: + _mesa_hash_table_clear(cmdbuf->events.ht, NULL); +} + +static VkResult +dzn_cmd_buffer_dynbitset_reserve(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) + return VK_SUCCESS; + + unsigned old_sz = array->size; + void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS); + if (!ptr) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmdbuf->error; + } + + memset(ptr, 0, array->size - old_sz); + return VK_SUCCESS; +} + +static bool +dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit) +{ + uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; + + if (bit < nbits) + return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit); + + return false; +} + +static VkResult +dzn_cmd_buffer_dynbitset_set(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit); + if (result != VK_SUCCESS) + return result; + + BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit); + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_dynbitset_clear(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) + return; + + BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit); +} + +static VkResult +dzn_cmd_buffer_dynbitset_set_range(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, + uint32_t bit, uint32_t count) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1); + if (result != VK_SUCCESS) + return result; + + BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1); + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_dynbitset_clear_range(dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, + uint32_t bit, uint32_t count) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; + + if (!nbits) + return; + + uint32_t end = MIN2(bit + count, nbits) - 1; + + while (bit <= end) { + uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32)); + BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1); + bit += subcount; + } +} + +static dzn_cmd_buffer_query_pool_state * +dzn_cmd_buffer_create_query_pool_state(dzn_cmd_buffer *cmdbuf) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + dzn_cmd_buffer_query_pool_state *state = (dzn_cmd_buffer_query_pool_state *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!state) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + + util_dynarray_init(&state->reset, NULL); + util_dynarray_init(&state->collect, NULL); + util_dynarray_init(&state->wait, NULL); + util_dynarray_init(&state->signal, NULL); + return state; +} + +static void +dzn_cmd_buffer_destroy_query_pool_state(dzn_cmd_buffer *cmdbuf, + dzn_cmd_buffer_query_pool_state *state) +{ + util_dynarray_fini(&state->reset); + util_dynarray_fini(&state->collect); + util_dynarray_fini(&state->wait); + util_dynarray_fini(&state->signal); + vk_free(&cmdbuf->vk.pool->alloc, state); +} + +static dzn_cmd_buffer_query_pool_state * +dzn_cmd_buffer_get_query_pool_state(dzn_cmd_buffer *cmdbuf, + dzn_query_pool *qpool) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + dzn_cmd_buffer_query_pool_state *state = NULL; + struct hash_entry *he = + _mesa_hash_table_search(cmdbuf->queries.ht, qpool); + + if (!he) { + state = dzn_cmd_buffer_create_query_pool_state(cmdbuf); + if (!state) + return NULL; + + he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state); + if (!he) { + dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state); + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + } else { + state = (dzn_cmd_buffer_query_pool_state *)he->data; + } + + return state; +} + +static VkResult +dzn_cmd_buffer_collect_queries(dzn_cmd_buffer *cmdbuf, + const dzn_query_pool *qpool, + dzn_cmd_buffer_query_pool_state *state, + uint32_t first_query, + uint32_t query_count) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + uint32_t nbits = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS; + uint32_t start, end; + + query_count = MIN2(query_count, nbits - first_query); + nbits = MIN2(first_query + query_count, nbits); + + VkResult result = + dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1); + if (result != VK_SUCCESS) + return result; + + BITSET_WORD *collect = + util_dynarray_element(&state->collect, BITSET_WORD, 0); + for (start = first_query, end = first_query, + __bitset_next_range(&start, &end, collect, nbits); + start < nbits; + __bitset_next_range(&start, &end, collect, nbits)) { + cmdbuf->cmdlist->ResolveQueryData(qpool->heap, qpool->queries[start].type, + start, end - start, + qpool->resolve_buffer, + qpool->query_size * start); + } + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = qpool->resolve_buffer, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query); + uint32_t size = dzn_query_pool_get_result_size(qpool, query_count); + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + + cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer, offset, + qpool->resolve_buffer, offset, + size); + + for (start = first_query, end = first_query, + __bitset_next_range(&start, &end, collect, nbits); + start < nbits; + __bitset_next_range(&start, &end, collect, nbits)) { + uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); + uint32_t count = end - start; + + for (unsigned i = 0; i < count; i+= step) { + uint32_t sub_count = MIN2(step, count - i); + + cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer, + dzn_query_pool_get_availability_offset(qpool, start + i), + device->queries.refs, + DZN_QUERY_REFS_ALL_ONES_OFFSET, + sizeof(uint64_t) * sub_count); + } + + dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count); + dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, start, count); + } + + DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter); + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + return VK_SUCCESS; +} + +static VkResult +dzn_cmd_buffer_collect_query_ops(dzn_cmd_buffer *cmdbuf, + dzn_query_pool *qpool, + struct util_dynarray *bitset_array, + struct util_dynarray *ops_array) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0); + uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS; + uint32_t start, end; + + BITSET_FOREACH_RANGE(start, end, bitset, nbits) { + dzn_cmd_buffer_query_range range { qpool, start, end - start }; + dzn_cmd_buffer_query_range *entry = (dzn_cmd_buffer_query_range *) + util_dynarray_grow(ops_array, dzn_cmd_buffer_query_range, 1); + + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return cmdbuf->error; + } + + *entry = range; + } + + return VK_SUCCESS; +} + +static VkResult +dzn_cmd_buffer_gather_queries(dzn_cmd_buffer *cmdbuf) +{ + hash_table_foreach(cmdbuf->queries.ht, he) { + dzn_query_pool *qpool = (dzn_query_pool *)he->key; + dzn_cmd_buffer_query_pool_state *state = + (dzn_cmd_buffer_query_pool_state *)he->data; + VkResult result = + dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count); + if (result != VK_SUCCESS) + return result; + + result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset); + if (result != VK_SUCCESS) + return result; + + result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->wait, &cmdbuf->queries.wait); + if (result != VK_SUCCESS) + return result; + + result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EndCommandBuffer(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + dzn_cmd_buffer_gather_events(cmdbuf); + dzn_cmd_buffer_gather_queries(cmdbuf); + HRESULT hres = cmdbuf->cmdlist->Close(); + if (FAILED(hres)) + cmdbuf->error = vk_error(cmdbuf->vk.base.device, VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + cmdbuf->error = cmdbuf->vk.cmd_queue.error; + } + + assert(cmdbuf->error == VK_SUCCESS); + return cmdbuf->error; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, + const VkDependencyInfo *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + bool execution_barrier = + !info->memoryBarrierCount && + !info->bufferMemoryBarrierCount && + !info->imageMemoryBarrierCount; + + if (execution_barrier) { + /* Execution barrier can be emulated with a NULL UAV barrier (AKA + * pipeline flush). That's the best we can do with the standard D3D12 + * barrier API. + */ + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .UAV = { .pResource = NULL }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + + /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers. + * Scopes are not taken into account, but that's inherent to the current + * D3D12 barrier API. + */ + if (info->memoryBarrierCount) { + D3D12_RESOURCE_BARRIER barriers[2] = {}; + + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[0].UAV.pResource = NULL; + barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[1].Aliasing.pResourceBefore = NULL; + barriers[1].Aliasing.pResourceAfter = NULL; + cmdbuf->cmdlist->ResourceBarrier(2, barriers); + } + + for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) { + VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer); + D3D12_RESOURCE_BARRIER barrier = {}; + + /* UAV are used only for storage buffers, skip all other buffers. */ + if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) + continue; + + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.UAV.pResource = buf->res; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + + for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i]; + const VkImageSubresourceRange *range = &ibarrier->subresourceRange; + VK_FROM_HANDLE(dzn_image, image, ibarrier->image); + + /* We use placed resource's simple model, in which only one resource + * pointing to a given heap is active at a given time. To make the + * resource active we need to add an aliasing barrier. + */ + D3D12_RESOURCE_BARRIER aliasing_barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Aliasing = { + .pResourceBefore = NULL, + .pResourceAfter = image->res, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &aliasing_barrier); + + D3D12_RESOURCE_BARRIER transition_barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .StateAfter = dzn_image_layout_to_state(ibarrier->newLayout), + }, + }; + + if (ibarrier->oldLayout == VK_IMAGE_LAYOUT_UNDEFINED || + ibarrier->oldLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) + transition_barrier.Transition.StateBefore = image->mem->initial_state; + else + transition_barrier.Transition.StateBefore = dzn_image_layout_to_state(ibarrier->oldLayout); + + if (transition_barrier.Transition.StateBefore == transition_barrier.Transition.StateAfter) + continue; + + /* some layouts map to the same states, and NOP-barriers are illegal */ + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t level_count = dzn_get_level_count(image, range); + for (uint32_t layer = 0; layer < layer_count; layer++) { + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + dzn_foreach_aspect(aspect, range->aspectMask) { + transition_barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer); + cmdbuf->cmdlist->ResourceBarrier(1, &transition_barrier); + } + } + } + } +} + +D3D12_CPU_DESCRIPTOR_HANDLE +dzn_cmd_buffer_get_dsv(dzn_cmd_buffer *cmdbuf, + const dzn_image *image, + const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + dzn_cmd_buffer_dsv_key key { image, *desc }; + struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key); + struct dzn_cmd_buffer_dsv_entry *dsve; + + if (!he) { + dzn_descriptor_heap *heap; + uint32_t slot; + + // TODO: error handling + dsve = (dzn_cmd_buffer_dsv_entry *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + dsve->key = key; + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot); + dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); + device->dev->CreateDepthStencilView(image->res, desc, dsve->handle); + _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve); + } else { + dsve = (dzn_cmd_buffer_dsv_entry *)he->data; + } + + return dsve->handle; +} + +D3D12_CPU_DESCRIPTOR_HANDLE +dzn_cmd_buffer_get_rtv(dzn_cmd_buffer *cmdbuf, + const dzn_image *image, + const D3D12_RENDER_TARGET_VIEW_DESC *desc) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + dzn_cmd_buffer_rtv_key key { image, *desc }; + struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key); + struct dzn_cmd_buffer_rtv_entry *rtve; + + if (!he) { + struct dzn_descriptor_heap *heap; + uint32_t slot; + + // TODO: error handling + rtve = (dzn_cmd_buffer_rtv_entry *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + rtve->key = key; + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); + rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); + device->dev->CreateRenderTargetView(image->res, desc, rtve->handle); + he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve); + } else { + rtve = (dzn_cmd_buffer_rtv_entry *)he->data; + } + + return rtve->handle; +} + +static VkResult +dzn_cmd_buffer_alloc_internal_buf(dzn_cmd_buffer *cmdbuf, + uint32_t size, + D3D12_HEAP_TYPE heap_type, + D3D12_RESOURCE_STATES init_state, + ID3D12Resource **out) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + ComPtr res; + *out = NULL; + + /* Align size on 64k (the default alignment) */ + size = ALIGN_POT(size, 64 * 1024); + + D3D12_HEAP_PROPERTIES hprops = + device->dev->GetCustomHeapProperties(0, heap_type); + D3D12_RESOURCE_DESC rdesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, + }; + + HRESULT hres = + device->dev->CreateCommittedResource(&hprops, D3D12_HEAP_FLAG_NONE, &rdesc, + init_state, + NULL, IID_PPV_ARGS(&res)); + if (FAILED(hres)) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return cmdbuf->error; + } + + dzn_internal_resource *entry = (dzn_internal_resource *) + vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return cmdbuf->error; + } + + entry->res = res.Detach(); + list_addtail(&entry->link, &cmdbuf->internal_bufs); + *out = entry->res; + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_clear_rects_with_copy(dzn_cmd_buffer *cmdbuf, + const dzn_image *image, + VkImageLayout layout, + const VkClearColorValue *color, + const VkImageSubresourceRange *range, + uint32_t rect_count, D3D12_RECT *rects) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + uint32_t blksize = util_format_get_blocksize(pfmt); + uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = {}; + uint32_t raw[4] = {}; + + assert(blksize <= sizeof(raw)); + assert(!(sizeof(buf) % blksize)); + + util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1); + + uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + while (fill_step % blksize) + fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + + uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel); + uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel); + uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step); + uint32_t res_size = max_h * row_pitch; + + assert(fill_step <= sizeof(buf)); + + for (uint32_t i = 0; i < fill_step; i += blksize) + memcpy(&buf[i], raw, blksize); + + ID3D12Resource *src_res; + + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + assert(!(res_size % fill_step)); + + uint8_t *cpu_ptr; + src_res->Map(0, NULL, (void **)&cpu_ptr); + for (uint32_t i = 0; i < res_size; i += fill_step) + memcpy(&cpu_ptr[i], buf, fill_step); + + src_res->Unmap(0, NULL); + + D3D12_TEXTURE_COPY_LOCATION src_loc = { + .pResource = src_res, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = { + .Offset = 0, + .Footprint = { + .Width = max_w, + .Height = max_h, + .Depth = 1, + .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step), + }, + }, + }; + + D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(layout); + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src_res, + .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + + barrier.Transition.pResource = image->res; + + assert(dzn_get_level_count(image, range) == 1); + uint32_t layer_count = dzn_get_layer_count(image, range); + + dzn_foreach_aspect(aspect, range->aspectMask) { + VkImageSubresourceLayers subres = { + .aspectMask = (VkImageAspectFlags)aspect, + .mipLevel = range->baseMipLevel, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = layer_count, + }; + + for (uint32_t layer = 0; layer < layer_count; layer++) { + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); + barrier.Transition.StateBefore = dst_state; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + + D3D12_TEXTURE_COPY_LOCATION dst_loc = + dzn_image_get_copy_loc(image, &subres, aspect, layer); + + src_loc.PlacedFootprint.Footprint.Format = + dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? + dst_loc.PlacedFootprint.Footprint.Format : + image->desc.Format; + + for (uint32_t r = 0; r < rect_count; r++) { + D3D12_BOX src_box = { + .left = 0, + .top = 0, + .front = 0, + .right = (UINT)(rects[r].right - rects[r].left), + .bottom = (UINT)(rects[r].bottom - rects[r].top), + .back = 1, + }; + + cmdbuf->cmdlist->CopyTextureRegion(&dst_loc, + rects[r].left, rects[r].top, 0, + &src_loc, &src_box); + } + + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.StateAfter = dst_state; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + } + } +} + +static VkClearColorValue +adjust_clear_color(VkFormat format, const VkClearColorValue &col) +{ + VkClearColorValue out = col; + + // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things + // manually where it matters, like here, in the clear path. + if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { + DZN_SWAP(out.float32[0], out.float32[1]); + DZN_SWAP(out.float32[2], out.float32[3]); + } + + return out; +} + +static void +dzn_cmd_buffer_clear_ranges_with_copy(dzn_cmd_buffer *cmdbuf, + const dzn_image *image, + VkImageLayout layout, + const VkClearColorValue *color, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + uint32_t blksize = util_format_get_blocksize(pfmt); + uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = {}; + uint32_t raw[4] = {}; + + assert(blksize <= sizeof(raw)); + assert(!(sizeof(buf) % blksize)); + + util_format_write_4(pfmt, (void *)color, 0, (void *)raw, 0, 0, 0, 1, 1); + + uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + while (fill_step % blksize) + fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + + uint32_t res_size = 0; + for (uint32_t r = 0; r < range_count; r++) { + uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel); + uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel); + uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel); + uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step); + + res_size = MAX2(res_size, h * d * row_pitch); + } + + assert(fill_step <= sizeof(buf)); + + for (uint32_t i = 0; i < fill_step; i += blksize) + memcpy(&buf[i], raw, blksize); + + ID3D12Resource *src_res; + + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + assert(!(res_size % fill_step)); + + uint8_t *cpu_ptr; + src_res->Map(0, NULL, (void **)&cpu_ptr); + for (uint32_t i = 0; i < res_size; i += fill_step) + memcpy(&cpu_ptr[i], buf, fill_step); + + src_res->Unmap(0, NULL); + + D3D12_TEXTURE_COPY_LOCATION src_loc = { + .pResource = src_res, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = { + .Offset = 0, + }, + }; + + D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(layout); + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src_res, + .StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + + barrier.Transition.pResource = image->res; + for (uint32_t r = 0; r < range_count; r++) { + uint32_t level_count = dzn_get_level_count(image, &ranges[r]); + uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]); + + dzn_foreach_aspect(aspect, ranges[r].aspectMask) { + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl); + uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl); + uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl); + VkImageSubresourceLayers subres = { + .aspectMask = (VkImageAspectFlags)aspect, + .mipLevel = ranges[r].baseMipLevel + lvl, + .baseArrayLayer = ranges[r].baseArrayLayer, + .layerCount = layer_count, + }; + + for (uint32_t layer = 0; layer < layer_count; layer++) { + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, &ranges[r], aspect, lvl, layer); + barrier.Transition.StateBefore = dst_state; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + + D3D12_TEXTURE_COPY_LOCATION dst_loc = + dzn_image_get_copy_loc(image, &subres, aspect, layer); + + src_loc.PlacedFootprint.Footprint.Format = + dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? + dst_loc.PlacedFootprint.Footprint.Format : + image->desc.Format; + src_loc.PlacedFootprint.Footprint.Width = w; + src_loc.PlacedFootprint.Footprint.Height = h; + src_loc.PlacedFootprint.Footprint.Depth = d; + src_loc.PlacedFootprint.Footprint.RowPitch = + ALIGN_NPOT(w * blksize, fill_step); + D3D12_BOX src_box = { + .left = 0, + .top = 0, + .front = 0, + .right = w, + .bottom = h, + .back = d, + }; + + cmdbuf->cmdlist->CopyTextureRegion(&dst_loc, 0, 0, 0, + &src_loc, &src_box); + + if (dst_state != D3D12_RESOURCE_STATE_COPY_DEST) { + barrier.Transition.StateAfter = dst_state; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + } + } + } + } +} + +static void +dzn_cmd_buffer_clear_attachment(dzn_cmd_buffer *cmdbuf, + uint32_t idx, + const VkClearValue *value, + VkImageAspectFlags aspects, + uint32_t base_layer, + uint32_t layer_count, + uint32_t rect_count, + D3D12_RECT *rects) +{ + if (idx == VK_ATTACHMENT_UNUSED) + return; + + dzn_image_view *view = cmdbuf->state.framebuffer->attachments[idx]; + dzn_image *image = container_of(view->vk.image, dzn_image, vk); + + VkImageSubresourceRange range = { + .aspectMask = aspects, + .baseMipLevel = view->vk.base_mip_level, + .levelCount = 1, + .baseArrayLayer = view->vk.base_array_layer + base_layer, + .layerCount = layer_count, + }; + bool all_layers = + base_layer == 0 && + (layer_count == view->vk.layer_count || + layer_count == VK_REMAINING_ARRAY_LAYERS); + + if (vk_format_is_depth_or_stencil(view->vk.format)) { + D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + flags |= D3D12_CLEAR_FLAG_DEPTH; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + flags |= D3D12_CLEAR_FLAG_STENCIL; + + if (flags != 0) { + auto desc = dzn_image_get_dsv_desc(image, &range, 0); + auto handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); + cmdbuf->cmdlist->ClearDepthStencilView(handle, flags, + value->depthStencil.depth, + value->depthStencil.stencil, + rect_count, rects); + } + } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + VkClearColorValue color = adjust_clear_color(view->vk.format, value->color); + bool clear_with_cpy = false; + float vals[4]; + + if (vk_format_is_sint(view->vk.format)) { + for (uint32_t i = 0; i < 4; i++) { + vals[i] = color.int32[i]; + if (color.int32[i] != (int32_t)vals[i]) { + clear_with_cpy = true; + break; + } + } + } else if (vk_format_is_uint(view->vk.format)) { + for (uint32_t i = 0; i < 4; i++) { + vals[i] = color.uint32[i]; + if (color.uint32[i] != (uint32_t)vals[i]) { + clear_with_cpy = true; + break; + } + } + } else { + for (uint32_t i = 0; i < 4; i++) + vals[i] = color.float32[i]; + } + + if (clear_with_cpy) { + dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &value->color, + &range, rect_count, rects); + } else { + auto desc = dzn_image_get_rtv_desc(image, &range, 0); + auto handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); + cmdbuf->cmdlist->ClearRenderTargetView(handle, vals, rect_count, rects); + } + } +} + +static void +dzn_cmd_buffer_clear_color(dzn_cmd_buffer *cmdbuf, + const dzn_image *image, + VkImageLayout layout, + const VkClearColorValue *col, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); + return; + } + + VkClearColorValue color = adjust_clear_color(image->vk.format, *col); + float clear_vals[4]; + + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + + if (util_format_is_pure_sint(pfmt)) { + for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { + clear_vals[c] = color.int32[c]; + if (color.int32[c] != (int32_t)clear_vals[c]) { + dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); + return; + } + } + } else if (util_format_is_pure_uint(pfmt)) { + for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { + clear_vals[c] = color.uint32[c]; + if (color.uint32[c] != (uint32_t)clear_vals[c]) { + dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); + return; + } + } + } else { + memcpy(clear_vals, color.float32, sizeof(clear_vals)); + } + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t level_count = dzn_get_level_count(image, range); + + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .StateBefore = dzn_image_layout_to_state(layout), + .StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET, + }, + }; + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { + for (uint32_t layer = 0; layer < layer_count; layer++) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, + VK_IMAGE_ASPECT_COLOR_BIT, + lvl, layer); + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + } + + VkImageSubresourceRange view_range = *range; + + if (image->vk.image_type == VK_IMAGE_TYPE_3D) { + view_range.baseArrayLayer = 0; + view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl); + } + + auto desc = dzn_image_get_rtv_desc(image, &view_range, lvl); + auto handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); + cmdbuf->cmdlist->ClearRenderTargetView(handle, clear_vals, 0, NULL); + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { + DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, VK_IMAGE_ASPECT_COLOR_BIT, lvl, layer); + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + } + } + } +} + +static void +dzn_cmd_buffer_clear_zs(dzn_cmd_buffer *cmdbuf, + const dzn_image *image, + VkImageLayout layout, + const VkClearDepthStencilValue *zs, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t level_count = dzn_get_level_count(image, range); + + D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; + + if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + flags |= D3D12_CLEAR_FLAG_DEPTH; + if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + flags |= D3D12_CLEAR_FLAG_STENCIL; + + for (uint32_t lvl = 0; lvl < level_count; lvl++) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .StateBefore = dzn_image_layout_to_state(layout), + .StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE, + }, + }; + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { + for (uint32_t layer = 0; layer < layer_count; layer++) { + dzn_foreach_aspect(aspect, range->aspectMask) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer); + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + } + } + + auto desc = dzn_image_get_dsv_desc(image, range, lvl); + auto handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); + cmdbuf->cmdlist->ClearDepthStencilView(handle, flags, + zs->depth, zs->stencil, + 0, NULL); + + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) { + DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + dzn_foreach_aspect(aspect, range->aspectMask) { + barrier.Transition.Subresource = + dzn_image_range_get_subresource_index(image, range, aspect, lvl, layer); + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } + } + } + } + } +} + +static void +dzn_cmd_buffer_copy_buf2img_region(dzn_cmd_buffer *cmdbuf, + const VkCopyBufferToImageInfo2 *info, + uint32_t r, + VkImageAspectFlagBits aspect, + uint32_t l) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); + VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage); + + ID3D12Device *dev = device->dev; + ID3D12GraphicsCommandList *cmdlist = cmdbuf->cmdlist; + + const VkBufferImageCopy2 *region = &info->pRegions[r]; + enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format); + uint32_t blkh = util_format_get_blockheight(pfmt); + uint32_t blkd = util_format_get_blockdepth(pfmt); + + D3D12_TEXTURE_COPY_LOCATION dst_img_loc = + dzn_image_get_copy_loc(dst_image, ®ion->imageSubresource, aspect, l); + D3D12_TEXTURE_COPY_LOCATION src_buf_loc = + dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, region, aspect, l); + + if (dzn_buffer_supports_region_copy(&src_buf_loc)) { + /* RowPitch and Offset are properly aligned, we can copy + * the whole thing in one call. + */ + D3D12_BOX src_box = { + .left = 0, + .top = 0, + .front = 0, + .right = region->imageExtent.width, + .bottom = region->imageExtent.height, + .back = region->imageExtent.depth, + }; + + cmdlist->CopyTextureRegion(&dst_img_loc, region->imageOffset.x, + region->imageOffset.y, region->imageOffset.z, + &src_buf_loc, &src_box); + return; + } + + /* Copy line-by-line if things are not properly aligned. */ + D3D12_BOX src_box = { + .top = 0, + .front = 0, + .bottom = blkh, + .back = blkd, + }; + + for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) { + for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) { + uint32_t src_x; + + D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc = + dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format, + region, &src_buf_loc, + y, z, &src_x); + + src_box.left = src_x; + src_box.right = src_x + region->imageExtent.width; + cmdlist->CopyTextureRegion(&dst_img_loc, + region->imageOffset.x, + region->imageOffset.y + y, + region->imageOffset.z + z, + &src_buf_line_loc, &src_box); + } + } +} + +static void +dzn_cmd_buffer_copy_img2buf_region(dzn_cmd_buffer *cmdbuf, + const VkCopyImageToBufferInfo2 *info, + uint32_t r, + VkImageAspectFlagBits aspect, + uint32_t l) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src_image, info->srcImage); + VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); + + ID3D12Device *dev = device->dev; + ID3D12GraphicsCommandList *cmdlist = cmdbuf->cmdlist; + + const VkBufferImageCopy2 *region = &info->pRegions[r]; + enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format); + uint32_t blkh = util_format_get_blockheight(pfmt); + uint32_t blkd = util_format_get_blockdepth(pfmt); + + D3D12_TEXTURE_COPY_LOCATION src_img_loc = + dzn_image_get_copy_loc(src_image, ®ion->imageSubresource, aspect, l); + D3D12_TEXTURE_COPY_LOCATION dst_buf_loc = + dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, region, aspect, l); + + if (dzn_buffer_supports_region_copy(&dst_buf_loc)) { + /* RowPitch and Offset are properly aligned on 256 bytes, we can copy + * the whole thing in one call. + */ + D3D12_BOX src_box = { + .left = (UINT)region->imageOffset.x, + .top = (UINT)region->imageOffset.y, + .front = (UINT)region->imageOffset.z, + .right = (UINT)(region->imageOffset.x + region->imageExtent.width), + .bottom = (UINT)(region->imageOffset.y + region->imageExtent.height), + .back = (UINT)(region->imageOffset.z + region->imageExtent.depth), + }; + + cmdlist->CopyTextureRegion(&dst_buf_loc, 0, 0, 0, + &src_img_loc, &src_box); + return; + } + + D3D12_BOX src_box = { + .left = (UINT)region->imageOffset.x, + .right = (UINT)(region->imageOffset.x + region->imageExtent.width), + }; + + /* Copy line-by-line if things are not properly aligned. */ + for (uint32_t z = 0; z < region->imageExtent.depth; z += blkd) { + src_box.front = region->imageOffset.z + z; + src_box.back = src_box.front + blkd; + + for (uint32_t y = 0; y < region->imageExtent.height; y += blkh) { + uint32_t dst_x; + + D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc = + dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format, + region, &dst_buf_loc, + y, z, &dst_x); + + src_box.top = region->imageOffset.y + y; + src_box.bottom = src_box.top + blkh; + + cmdlist->CopyTextureRegion(&dst_buf_line_loc, dst_x, 0, 0, + &src_img_loc, &src_box); + } + } +} + +static void +dzn_cmd_buffer_copy_img_chunk(dzn_cmd_buffer *cmdbuf, + const VkCopyImageInfo2 *info, + D3D12_RESOURCE_DESC &tmp_desc, + D3D12_TEXTURE_COPY_LOCATION &tmp_loc, + uint32_t r, + VkImageAspectFlagBits aspect, + uint32_t l) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + ID3D12Device *dev = device->dev; + ID3D12GraphicsCommandList *cmdlist = cmdbuf->cmdlist; + + const VkImageCopy2 *region = &info->pRegions[r]; + const VkImageSubresourceLayers *src_subres = ®ion->srcSubresource; + const VkImageSubresourceLayers *dst_subres = ®ion->dstSubresource; + VkFormat src_format = + dzn_image_get_plane_format(src->vk.format, aspect); + VkFormat dst_format = + dzn_image_get_plane_format(dst->vk.format, aspect); + + enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format); + uint32_t src_blkw = util_format_get_blockwidth(src_pfmt); + uint32_t src_blkh = util_format_get_blockheight(src_pfmt); + uint32_t src_blkd = util_format_get_blockdepth(src_pfmt); + enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format); + uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt); + uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt); + uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt); + + assert(src_subres->layerCount == dst_subres->layerCount); + assert(src_subres->aspectMask == dst_subres->aspectMask); + + auto dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, l); + auto src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, l); + + D3D12_BOX src_box = { + .left = (UINT)MAX2(region->srcOffset.x, 0), + .top = (UINT)MAX2(region->srcOffset.y, 0), + .front = (UINT)MAX2(region->srcOffset.z, 0), + .right = (UINT)region->srcOffset.x + region->extent.width, + .bottom = (UINT)region->srcOffset.y + region->extent.height, + .back = (UINT)region->srcOffset.z + region->extent.depth, + }; + + if (!tmp_loc.pResource) { + cmdlist->CopyTextureRegion(&dst_loc, region->dstOffset.x, + region->dstOffset.y, region->dstOffset.z, + &src_loc, &src_box); + return; + } + + tmp_desc.Format = + dzn_image_get_placed_footprint_format(src->vk.format, aspect); + tmp_desc.Width = region->extent.width; + tmp_desc.Height = region->extent.height; + + dev->GetCopyableFootprints(&tmp_desc, + 0, 1, 0, + &tmp_loc.PlacedFootprint, + NULL, NULL, NULL); + + tmp_loc.PlacedFootprint.Footprint.Depth = region->extent.depth; + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = tmp_loc.pResource, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE, + .StateAfter = D3D12_RESOURCE_STATE_COPY_DEST, + }, + }; + + if (r > 0 || l > 0) + cmdlist->ResourceBarrier(1, &barrier); + + cmdlist->CopyTextureRegion(&tmp_loc, 0, 0, 0, &src_loc, &src_box); + + DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter); + cmdlist->ResourceBarrier(1, &barrier); + + tmp_desc.Format = + dzn_image_get_placed_footprint_format(dst->vk.format, aspect); + if (src_blkw != dst_blkw) + tmp_desc.Width = DIV_ROUND_UP(region->extent.width, src_blkw) * dst_blkw; + if (src_blkh != dst_blkh) + tmp_desc.Height = DIV_ROUND_UP(region->extent.height, src_blkh) * dst_blkh; + + device->dev->GetCopyableFootprints(&tmp_desc, + 0, 1, 0, + &tmp_loc.PlacedFootprint, + NULL, NULL, NULL); + + if (src_blkd != dst_blkd) { + tmp_loc.PlacedFootprint.Footprint.Depth = + DIV_ROUND_UP(region->extent.depth, src_blkd) * dst_blkd; + } else { + tmp_loc.PlacedFootprint.Footprint.Depth = region->extent.depth; + } + + D3D12_BOX tmp_box = { + .left = 0, + .top = 0, + .front = 0, + .right = tmp_loc.PlacedFootprint.Footprint.Width, + .bottom = tmp_loc.PlacedFootprint.Footprint.Height, + .back = tmp_loc.PlacedFootprint.Footprint.Depth, + }; + + cmdlist->CopyTextureRegion(&dst_loc, + region->dstOffset.x, + region->dstOffset.y, + region->dstOffset.z, + &tmp_loc, &tmp_box); +} + +static void +dzn_cmd_buffer_blit_prepare_src_view(dzn_cmd_buffer *cmdbuf, + VkImage image, + VkImageAspectFlagBits aspect, + const VkImageSubresourceLayers *subres, + dzn_descriptor_heap *heap, + uint32_t heap_slot) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_image, img, image); + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = image, + .format = img->vk.format, + .subresourceRange = { + .aspectMask = (VkImageAspectFlags)aspect, + .baseMipLevel = subres->mipLevel, + .levelCount = 1, + .baseArrayLayer = subres->baseArrayLayer, + .layerCount = subres->layerCount, + }, + }; + + if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + iview_info.components.r = VK_COMPONENT_SWIZZLE_G; + iview_info.components.g = VK_COMPONENT_SWIZZLE_G; + iview_info.components.b = VK_COMPONENT_SWIZZLE_G; + iview_info.components.a = VK_COMPONENT_SWIZZLE_G; + } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + iview_info.components.r = VK_COMPONENT_SWIZZLE_R; + iview_info.components.g = VK_COMPONENT_SWIZZLE_R; + iview_info.components.b = VK_COMPONENT_SWIZZLE_R; + iview_info.components.a = VK_COMPONENT_SWIZZLE_R; + } + + switch (img->vk.image_type) { + case VK_IMAGE_TYPE_1D: + iview_info.viewType = img->vk.array_layers > 1 ? + VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; + break; + case VK_IMAGE_TYPE_2D: + iview_info.viewType = img->vk.array_layers > 1 ? + VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + break; + case VK_IMAGE_TYPE_3D: + iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + default: + unreachable("Invalid type"); + } + + dzn_image_view iview; + dzn_image_view_init(device, &iview, &iview_info); + dzn_descriptor_heap_write_image_view_desc(heap, heap_slot, false, false, &iview); + dzn_image_view_finish(&iview); + + D3D12_GPU_DESCRIPTOR_HANDLE handle = + dzn_descriptor_heap_get_gpu_handle(heap, heap_slot); + cmdbuf->cmdlist->SetGraphicsRootDescriptorTable(0, handle); +} + +static void +dzn_cmd_buffer_blit_prepare_dst_view(dzn_cmd_buffer *cmdbuf, + dzn_image *img, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t layer) +{ + bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + VkImageSubresourceRange range = { + .aspectMask = (VkImageAspectFlags)aspect, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }; + + if (ds) { + auto desc = dzn_image_get_dsv_desc(img, &range, 0); + auto handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc); + cmdbuf->cmdlist->OMSetRenderTargets(0, NULL, TRUE, &handle); + } else { + auto desc = dzn_image_get_rtv_desc(img, &range, 0); + auto handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc); + cmdbuf->cmdlist->OMSetRenderTargets(1, &handle, FALSE, NULL); + } +} + +static void +dzn_cmd_buffer_blit_set_pipeline(dzn_cmd_buffer *cmdbuf, + const dzn_image *src, + const dzn_image *dst, + VkImageAspectFlagBits aspect, + VkFilter filter, bool resolve) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format); + VkImageUsageFlags usage = + vk_format_is_depth_or_stencil(dst->vk.format) ? + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + struct dzn_meta_blit_key ctx_key = { + .out_format = dzn_image_get_dxgi_format(dst->vk.format, usage, aspect), + .samples = (uint32_t)src->vk.samples, + .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? + FRAG_RESULT_DEPTH : + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? + FRAG_RESULT_STENCIL : + FRAG_RESULT_DATA0), + .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT : + util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT : + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT : + GLSL_TYPE_FLOAT), + .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D : + src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D : + src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS : + GLSL_SAMPLER_DIM_3D), + .src_is_array = src->vk.array_layers > 1, + .resolve = resolve, + .linear_filter = filter == VK_FILTER_LINEAR, + .padding = 0, + }; + + const dzn_meta_blit *ctx = + dzn_meta_blits_get_context(device, &ctx_key); + assert(ctx); + + cmdbuf->cmdlist->SetGraphicsRootSignature(ctx->root_sig); + cmdbuf->cmdlist->SetPipelineState(ctx->pipeline_state); +} + +static void +dzn_cmd_buffer_blit_set_2d_region(dzn_cmd_buffer *cmdbuf, + const dzn_image *src, + const VkImageSubresourceLayers *src_subres, + const VkOffset3D *src_offsets, + const dzn_image *dst, + const VkImageSubresourceLayers *dst_subres, + const VkOffset3D *dst_offsets, + bool normalize_src_coords) +{ + uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel); + uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel); + uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel); + uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel); + + float dst_pos[4] = { + (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f), + (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f), + }; + + float src_pos[4] = { + (float)src_offsets[0].x, (float)src_offsets[0].y, + (float)src_offsets[1].x, (float)src_offsets[1].y, + }; + + if (normalize_src_coords) { + src_pos[0] /= src_w; + src_pos[1] /= src_h; + src_pos[2] /= src_w; + src_pos[3] /= src_h; + } + + float coords[] = { + dst_pos[0], dst_pos[1], src_pos[0], src_pos[1], + dst_pos[2], dst_pos[1], src_pos[2], src_pos[1], + dst_pos[0], dst_pos[3], src_pos[0], src_pos[3], + dst_pos[2], dst_pos[3], src_pos[2], src_pos[3], + }; + + cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(1, ARRAY_SIZE(coords), coords, 0); + + D3D12_VIEWPORT vp = { + .TopLeftX = 0, + .TopLeftY = 0, + .Width = (float)dst_w, + .Height = (float)dst_h, + .MinDepth = 0, + .MaxDepth = 1, + }; + cmdbuf->cmdlist->RSSetViewports(1, &vp); + + D3D12_RECT scissor = { + .left = MIN2(dst_offsets[0].x, dst_offsets[1].x), + .top = MIN2(dst_offsets[0].y, dst_offsets[1].y), + .right = MAX2(dst_offsets[0].x, dst_offsets[1].x), + .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y), + }; + cmdbuf->cmdlist->RSSetScissorRects(1, &scissor); +} + +static void +dzn_cmd_buffer_blit_issue_barriers(dzn_cmd_buffer *cmdbuf, + dzn_image *src, VkImageLayout src_layout, + const VkImageSubresourceLayers *src_subres, + dzn_image *dst, VkImageLayout dst_layout, + const VkImageSubresourceLayers *dst_subres, + VkImageAspectFlagBits aspect, + bool post) +{ + bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + D3D12_RESOURCE_BARRIER barriers[2] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src->res, + .StateBefore = dzn_image_layout_to_state(src_layout), + .StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + }, + }, + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = dst->res, + .StateBefore = dzn_image_layout_to_state(dst_layout), + .StateAfter = ds ? + D3D12_RESOURCE_STATE_DEPTH_WRITE : + D3D12_RESOURCE_STATE_RENDER_TARGET, + }, + }, + }; + + if (post) { + DZN_SWAP(barriers[0].Transition.StateBefore, barriers[0].Transition.StateAfter); + DZN_SWAP(barriers[1].Transition.StateBefore, barriers[1].Transition.StateAfter); + } + + uint32_t layer_count = dzn_get_layer_count(src, src_subres); + uint32_t src_level = src_subres->mipLevel; + uint32_t dst_level = dst_subres->mipLevel; + + assert(dzn_get_layer_count(dst, dst_subres) == layer_count); + assert(src_level < src->vk.mip_levels); + assert(dst_level < dst->vk.mip_levels); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + barriers[0].Transition.Subresource = + dzn_image_layers_get_subresource_index(src, src_subres, aspect, layer); + barriers[1].Transition.Subresource = + dzn_image_layers_get_subresource_index(dst, dst_subres, aspect, layer); + cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(barriers), barriers); + } +} + +static void +dzn_cmd_buffer_blit_region(dzn_cmd_buffer *cmdbuf, + const VkBlitImageInfo2 *info, + dzn_descriptor_heap *heap, + uint32_t *heap_slot, + uint32_t r) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + ID3D12Device *dev = device->dev; + const VkImageBlit2 *region = &info->pRegions[r]; + + dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, false); + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, false); + dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, + aspect, ®ion->srcSubresource, + heap, (*heap_slot)++); + dzn_cmd_buffer_blit_set_2d_region(cmdbuf, + src, ®ion->srcSubresource, region->srcOffsets, + dst, ®ion->dstSubresource, region->dstOffsets, + src->vk.samples == 1); + + uint32_t dst_depth = + region->dstOffsets[1].z > region->dstOffsets[0].z ? + region->dstOffsets[1].z - region->dstOffsets[0].z : + region->dstOffsets[0].z - region->dstOffsets[1].z; + uint32_t src_depth = + region->srcOffsets[1].z > region->srcOffsets[0].z ? + region->srcOffsets[1].z - region->srcOffsets[0].z : + region->srcOffsets[0].z - region->srcOffsets[1].z; + + uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); + uint32_t dst_level = region->dstSubresource.mipLevel; + + float src_slice_step = layer_count > 1 ? 1 : (float)src_depth / dst_depth; + if (region->srcOffsets[0].z > region->srcOffsets[1].z) + src_slice_step = -src_slice_step; + float src_z_coord = layer_count > 1 ? + 0 : (float)region->srcOffsets[0].z + (src_slice_step * 0.5f); + uint32_t slice_count = layer_count > 1 ? layer_count : dst_depth; + uint32_t dst_z_coord = layer_count > 1 ? + region->dstSubresource.baseArrayLayer : + region->dstOffsets[0].z; + if (region->dstOffsets[0].z > region->dstOffsets[1].z) + dst_z_coord--; + + uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ? + 1 : -1; + + /* Normalize the src coordinates/step */ + if (layer_count == 1 && src->vk.samples == 1) { + src_z_coord /= src->vk.extent.depth; + src_slice_step /= src->vk.extent.depth; + } + + for (uint32_t slice = 0; slice < slice_count; slice++) { + dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord); + cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(1, 1, &src_z_coord, 16); + cmdbuf->cmdlist->DrawInstanced(4, 1, 0, 0); + src_z_coord += src_slice_step; + dst_z_coord += dst_slice_step; + } + + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, true); + } +} + +static void +dzn_cmd_buffer_resolve_region(dzn_cmd_buffer *cmdbuf, + const VkResolveImageInfo2 *info, + dzn_descriptor_heap *heap, + uint32_t *heap_slot, + uint32_t r) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + ID3D12Device *dev = device->dev; + const VkImageResolve2 *region = &info->pRegions[r]; + + dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { + dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, true); + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, false); + dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect, + ®ion->srcSubresource, + heap, (*heap_slot)++); + + VkOffset3D src_offset[2] = { + { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + { + .x = (int32_t)(region->srcOffset.x + region->extent.width), + .y = (int32_t)(region->srcOffset.y + region->extent.height), + }, + }; + VkOffset3D dst_offset[2] = { + { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + { + .x = (int32_t)(region->dstOffset.x + region->extent.width), + .y = (int32_t)(region->dstOffset.y + region->extent.height), + }, + }; + + dzn_cmd_buffer_blit_set_2d_region(cmdbuf, + src, ®ion->srcSubresource, src_offset, + dst, ®ion->dstSubresource, dst_offset, + false); + + uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); + for (uint32_t layer = 0; layer < layer_count; layer++) { + float src_z_coord = layer; + + dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, + dst, aspect, region->dstSubresource.mipLevel, + region->dstSubresource.baseArrayLayer + layer); + cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(1, 1, &src_z_coord, 16); + cmdbuf->cmdlist->DrawInstanced(4, 1, 0, 0); + } + + dzn_cmd_buffer_blit_issue_barriers(cmdbuf, + src, info->srcImageLayout, ®ion->srcSubresource, + dst, info->dstImageLayout, ®ion->dstSubresource, + aspect, true); + } +} + +static void +dzn_cmd_buffer_clear_attachments(dzn_cmd_buffer *cmdbuf, + uint32_t attachment_count, + const VkClearAttachment *attachments, + uint32_t rect_count, + const VkClearRect *rects) +{ + struct dzn_render_pass *pass = cmdbuf->state.pass; + const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass]; + + for (unsigned i = 0; i < attachment_count; i++) { + uint32_t idx; + if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) + idx = subpass->colors[attachments[i].colorAttachment].idx; + else + idx = subpass->zs.idx; + + for (uint32_t j = 0; j < rect_count; j++) { + D3D12_RECT rect; + + dzn_translate_rect(&rect, &rects[j].rect); + dzn_cmd_buffer_clear_attachment(cmdbuf, + idx, &attachments[i].clearValue, + attachments[i].aspectMask, + rects[j].baseArrayLayer, + rects[j].layerCount, + 1, &rect); + } + } +} + +static void +dzn_cmd_buffer_attachment_ref_transition(dzn_cmd_buffer *cmdbuf, + const dzn_attachment_ref *att) +{ + const dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx]; + const dzn_image *image = container_of(iview->vk.image, dzn_image, vk); + + if (att->before == att->during) + return; + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .Subresource = 0, // YOLO + .StateBefore = att->before, + .StateAfter = att->during, + }, + }; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); +} + +void +dzn_cmd_buffer_attachment_transition(dzn_cmd_buffer *cmdbuf, + const dzn_attachment *att) +{ + const dzn_image_view *iview = cmdbuf->state.framebuffer->attachments[att->idx]; + const dzn_image *image = container_of(iview->vk.image, dzn_image, vk); + + if (att->last == att->after) + return; + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = image->res, + .Subresource = 0, // YOLO + .StateBefore = att->last, + .StateAfter = att->after, + }, + }; + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); +} + +static void +dzn_cmd_buffer_resolve_attachment(dzn_cmd_buffer *cmdbuf, uint32_t i) +{ + const struct dzn_subpass *subpass = + &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass]; + + if (subpass->resolve[i].idx == VK_ATTACHMENT_UNUSED) + return; + + const dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer; + struct dzn_image_view *src = framebuffer->attachments[subpass->colors[i].idx]; + struct dzn_image *src_img = container_of(src->vk.image, dzn_image, vk); + struct dzn_image_view *dst = framebuffer->attachments[subpass->resolve[i].idx]; + struct dzn_image *dst_img = container_of(dst->vk.image, dzn_image, vk); + D3D12_RESOURCE_BARRIER barriers[2]; + uint32_t barrier_count = 0; + + /* TODO: 2DArrays/3D */ + if (subpass->colors[i].during != D3D12_RESOURCE_STATE_RESOLVE_SOURCE) { + barriers[barrier_count++] = D3D12_RESOURCE_BARRIER { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = src_img->res, + .Subresource = 0, + .StateBefore = subpass->colors[i].during, + .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + }, + }; + } + + if (subpass->resolve[i].during != D3D12_RESOURCE_STATE_RESOLVE_DEST) { + barriers[barrier_count++] = D3D12_RESOURCE_BARRIER { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = dst_img->res, + .Subresource = 0, + .StateBefore = subpass->resolve[i].during, + .StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST, + }, + }; + } + + if (barrier_count) + cmdbuf->cmdlist->ResourceBarrier(barrier_count, barriers); + + cmdbuf->cmdlist->ResolveSubresource(dst_img->res, 0, + src_img->res, 0, + dst->srv_desc.Format); + + for (uint32_t b = 0; b < barrier_count; b++) + DZN_SWAP(barriers[b].Transition.StateBefore, barriers[b].Transition.StateAfter); + + if (barrier_count) + cmdbuf->cmdlist->ResourceBarrier(barrier_count, barriers); +} + +static void +dzn_cmd_buffer_begin_subpass(dzn_cmd_buffer *cmdbuf) +{ + struct dzn_framebuffer *framebuffer = cmdbuf->state.framebuffer; + struct dzn_render_pass *pass = cmdbuf->state.pass; + const struct dzn_subpass *subpass = &pass->subpasses[cmdbuf->state.subpass]; + + D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { }; + D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 }; + + for (uint32_t i = 0; i < subpass->color_count; i++) { + if (subpass->colors[i].idx == VK_ATTACHMENT_UNUSED) continue; + + dzn_image_view *iview = framebuffer->attachments[subpass->colors[i].idx]; + dzn_image *img = container_of(iview->vk.image, dzn_image, vk); + + rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc); + } + + if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) { + dzn_image_view *iview = framebuffer->attachments[subpass->zs.idx]; + dzn_image *img = container_of(iview->vk.image, dzn_image, vk); + + zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc); + } + + cmdbuf->cmdlist->OMSetRenderTargets(subpass->color_count, + subpass->color_count ? rt_handles : NULL, + FALSE, zs_handle.ptr ? &zs_handle : NULL); + + for (uint32_t i = 0; i < subpass->color_count; i++) + dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->colors[i]); + for (uint32_t i = 0; i < subpass->input_count; i++) + dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->inputs[i]); + + if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) + dzn_cmd_buffer_attachment_ref_transition(cmdbuf, &subpass->zs); +} + +static void +dzn_cmd_buffer_end_subpass(dzn_cmd_buffer *cmdbuf) +{ + const dzn_subpass *subpass = &cmdbuf->state.pass->subpasses[cmdbuf->state.subpass]; + + for (uint32_t i = 0; i < subpass->color_count; i++) + dzn_cmd_buffer_resolve_attachment(cmdbuf, i); +} + +static void +dzn_cmd_buffer_update_pipeline(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + const dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; + + if (!pipeline) + return; + + if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) { + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + const dzn_graphics_pipeline *gfx = + reinterpret_cast(pipeline); + cmdbuf->cmdlist->SetGraphicsRootSignature(pipeline->root.sig); + cmdbuf->cmdlist->IASetPrimitiveTopology(gfx->ia.topology); + } else { + cmdbuf->cmdlist->SetComputeRootSignature(pipeline->root.sig); + } + } + + if (cmdbuf->state.pipeline != pipeline) { + cmdbuf->cmdlist->SetPipelineState(pipeline->state); + cmdbuf->state.pipeline = pipeline; + } +} + +static void +dzn_cmd_buffer_update_heaps(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + struct dzn_descriptor_state *desc_state = + &cmdbuf->state.bindpoint[bindpoint].desc_state; + dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = { + desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV], + desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] + }; + uint32_t new_heap_offsets[NUM_POOL_TYPES] = {}; + bool update_root_desc_table[NUM_POOL_TYPES] = {}; + const struct dzn_pipeline *pipeline = + cmdbuf->state.bindpoint[bindpoint].pipeline; + + if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_HEAPS)) + goto set_heaps; + + dzn_foreach_pool_type (type) { + uint32_t desc_count = pipeline->desc_count[type]; + if (!desc_count) + continue; + + dzn_descriptor_heap_pool *pool = + type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? + &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool; + uint32_t dst_offset = 0; + dzn_descriptor_heap *dst_heap = NULL; + uint32_t dst_heap_offset = 0; + + dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count, + &dst_heap, &dst_heap_offset); + new_heap_offsets[type] = dst_heap_offset; + update_root_desc_table[type] = true; + + for (uint32_t s = 0; s < MAX_SETS; s++) { + const struct dzn_descriptor_set *set = desc_state->sets[s].set; + if (!set) continue; + + uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type]; + uint32_t set_desc_count = pipeline->sets[s].range_desc_count[type]; + if (set_desc_count) { + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_copy(dst_heap, dst_heap_offset + set_heap_offset, + &set->pool->heaps[type], set->heap_offsets[type], + set_desc_count); + mtx_unlock(&set->pool->defragment_lock); + } + + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { + uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; + for (uint32_t o = 0; o < dynamic_buffer_count; o++) { + uint32_t desc_heap_offset = + pipeline->sets[s].dynamic_buffer_heap_offsets[o].srv; + dzn_buffer_desc bdesc = set->dynamic_buffers[o]; + bdesc.offset += desc_state->sets[s].dynamic_offsets[o]; + + dzn_descriptor_heap_write_buffer_desc(dst_heap, + dst_heap_offset + set_heap_offset + desc_heap_offset, + false, &bdesc); + + if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav != ~0) { + desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].uav; + dzn_descriptor_heap_write_buffer_desc(dst_heap, + dst_heap_offset + set_heap_offset + desc_heap_offset, + true, &bdesc); + } + } + } + } + + new_heaps[type] = dst_heap; + } + +set_heaps: + if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] || + new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) { + ID3D12DescriptorHeap *desc_heaps[2]; + uint32_t num_desc_heaps = 0; + if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) + desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap; + if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) + desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap; + cmdbuf->cmdlist->SetDescriptorHeaps(num_desc_heaps, desc_heaps); + + for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++) + cmdbuf->state.heaps[h] = new_heaps[h]; + } + + for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) { + D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r]; + + if (!update_root_desc_table[type]) + continue; + + D3D12_GPU_DESCRIPTOR_HANDLE handle = + dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]); + + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) + cmdbuf->cmdlist->SetGraphicsRootDescriptorTable(r, handle); + else + cmdbuf->cmdlist->SetComputeRootDescriptorTable(r, handle); + } +} + +static void +dzn_cmd_buffer_update_sysvals(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS)) + return; + + const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; + uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx; + + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(sysval_cbv_param_idx, + sizeof(cmdbuf->state.sysvals.gfx) / 4, + &cmdbuf->state.sysvals.gfx, 0); + } else { + cmdbuf->cmdlist->SetComputeRoot32BitConstants(sysval_cbv_param_idx, + sizeof(cmdbuf->state.sysvals.compute) / 4, + &cmdbuf->state.sysvals.compute, 0); + } +} + +static void +dzn_cmd_buffer_update_viewports(dzn_cmd_buffer *cmdbuf) +{ + const dzn_graphics_pipeline *pipeline = + reinterpret_cast(cmdbuf->state.pipeline); + + if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) || + !pipeline->vp.count) + return; + + cmdbuf->cmdlist->RSSetViewports(pipeline->vp.count, cmdbuf->state.viewports); +} + +static void +dzn_cmd_buffer_update_scissors(dzn_cmd_buffer *cmdbuf) +{ + const dzn_graphics_pipeline *pipeline = + reinterpret_cast(cmdbuf->state.pipeline); + + if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS)) + return; + + if (!pipeline->scissor.count) { + /* Apply a scissor delimiting the render area. */ + cmdbuf->cmdlist->RSSetScissorRects(1, &cmdbuf->state.render_area); + return; + } + + D3D12_RECT scissors[MAX_SCISSOR]; + uint32_t scissor_count = pipeline->scissor.count; + + memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count); + for (uint32_t i = 0; i < pipeline->scissor.count; i++) { + scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render_area.left); + scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render_area.top); + scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render_area.right); + scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render_area.bottom); + } + + cmdbuf->cmdlist->RSSetScissorRects(pipeline->scissor.count, scissors); +} + +static void +dzn_cmd_buffer_update_vbviews(dzn_cmd_buffer *cmdbuf) +{ + const dzn_graphics_pipeline *pipeline = + reinterpret_cast(cmdbuf->state.pipeline); + unsigned start, end; + + BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS) + cmdbuf->cmdlist->IASetVertexBuffers(start, end - start, cmdbuf->state.vb.views); + + BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS); +} + +static void +dzn_cmd_buffer_update_ibview(dzn_cmd_buffer *cmdbuf) +{ + if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB)) + return; + + cmdbuf->cmdlist->IASetIndexBuffer(&cmdbuf->state.ib.view); +} + +static void +dzn_cmd_buffer_update_push_constants(dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) +{ + struct dzn_cmd_buffer_push_constant_state *state = + bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? + &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute; + + uint32_t offset = state->offset / 4; + uint32_t end = ALIGN(state->end, 4) / 4; + uint32_t count = end - offset; + + if (!count) + return; + + uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx; + uint32_t *vals = state->values + offset; + + if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) + cmdbuf->cmdlist->SetGraphicsRoot32BitConstants(slot, count, vals, offset); + else + cmdbuf->cmdlist->SetComputeRoot32BitConstants(slot, count, vals, offset); + + state->offset = 0; + state->end = 0; +} + +void +dzn_cmd_buffer_update_zsa(dzn_cmd_buffer *cmdbuf) +{ + if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) { + const dzn_graphics_pipeline *gfx = (const dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + uint32_t ref = + gfx->zsa.stencil_test.front.uses_ref ? + cmdbuf->state.zsa.stencil_test.front.ref : + cmdbuf->state.zsa.stencil_test.back.ref; + cmdbuf->cmdlist->OMSetStencilRef(ref); + } +} + +static VkResult +dzn_cmd_buffer_triangle_fan_create_index(dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4; + uint32_t triangle_count = MAX2(*vertex_count, 2) - 2; + + *vertex_count = triangle_count * 3; + if (!*vertex_count) + return VK_SUCCESS; + + ID3D12Resource *index_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &index_buf); + if (result != VK_SUCCESS) + return result; + + void *cpu_ptr; + index_buf->Map(0, NULL, &cpu_ptr); + + /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ + if (index_size == 2) { + uint16_t *indices = (uint16_t *)cpu_ptr; + for (uint32_t t = 0; t < triangle_count; t++) { + indices[t * 3] = t + 1; + indices[(t * 3) + 1] = t + 2; + indices[(t * 3) + 2] = 0; + } + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; + } else { + uint32_t *indices = (uint32_t *)cpu_ptr; + for (uint32_t t = 0; t < triangle_count; t++) { + indices[t * 3] = t + 1; + indices[(t * 3) + 1] = t + 2; + indices[(t * 3) + 2] = 0; + } + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + } + + cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size; + cmdbuf->state.ib.view.BufferLocation = index_buf->GetGPUVirtualAddress(); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + return VK_SUCCESS; +} + +static VkResult +dzn_cmd_buffer_triangle_fan_rewrite_index(dzn_cmd_buffer *cmdbuf, + uint32_t *index_count, + uint32_t *first_index) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + uint32_t triangle_count = MAX2(*index_count, 2) - 2; + + *index_count = triangle_count * 3; + if (!*index_count) + return VK_SUCCESS; + + /* New index is always 32bit to make the compute shader rewriting the + * index simpler */ + ID3D12Resource *new_index_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &new_index_buf); + if (result != VK_SUCCESS) + return result; + + D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu = + cmdbuf->state.ib.view.BufferLocation; + + enum dzn_index_type index_type = + dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format); + const dzn_meta_triangle_fan_rewrite_index *rewrite_index = + &device->triangle_fan[index_type]; + + const dzn_pipeline *compute_pipeline = + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; + + struct dzn_triangle_fan_rewrite_index_params params = { + .first_index = *first_index, + }; + + cmdbuf->cmdlist->SetComputeRootSignature(rewrite_index->root_sig); + cmdbuf->cmdlist->SetPipelineState(rewrite_index->pipeline_state); + cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(0, new_index_buf->GetGPUVirtualAddress()); + cmdbuf->cmdlist->SetComputeRoot32BitConstants(1, sizeof(params) / 4, + ¶ms, 0); + cmdbuf->cmdlist->SetComputeRootShaderResourceView(2, old_index_buf_gpu); + cmdbuf->cmdlist->Dispatch(triangle_count, 1, 1); + + D3D12_RESOURCE_BARRIER post_barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * pass to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = new_index_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER, + }, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(post_barriers), post_barriers); + + /* We don't mess up with the driver state when executing our internal + * compute shader, but we still change the D3D12 state, so let's mark + * things dirty if needed. + */ + cmdbuf->state.pipeline = NULL; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } + + cmdbuf->state.ib.view.SizeInBytes = *index_count * 4; + cmdbuf->state.ib.view.BufferLocation = new_index_buf->GetGPUVirtualAddress(); + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + *first_index = 0; + return VK_SUCCESS; +} + +static void +dzn_cmd_buffer_prepare_draw(dzn_cmd_buffer *cmdbuf, bool indexed) +{ + dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_viewports(cmdbuf); + dzn_cmd_buffer_update_scissors(cmdbuf); + dzn_cmd_buffer_update_vbviews(cmdbuf); + dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); + dzn_cmd_buffer_update_zsa(cmdbuf); + + if (indexed) + dzn_cmd_buffer_update_ibview(cmdbuf); + + /* Reset the dirty states */ + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty = 0; + cmdbuf->state.dirty = 0; +} + +static uint32_t +dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(dzn_cmd_buffer *cmdbuf, bool indexed) +{ + dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + + if (!pipeline->ia.triangle_fan) + return 0; + + uint32_t max_triangles; + + if (indexed) { + uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2; + uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size; + + max_triangles = MAX2(max_indices, 2) - 2; + } else { + uint32_t max_vertex = 0; + for (uint32_t i = 0; i < pipeline->vb.count; i++) { + max_vertex = + MAX2(max_vertex, + cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes); + } + + max_triangles = MAX2(max_vertex, 2) - 2; + } + + return max_triangles * 3; +} + +static void +dzn_cmd_buffer_indirect_draw(dzn_cmd_buffer *cmdbuf, + dzn_buffer *draw_buf, + size_t draw_buf_offset, + uint32_t draw_count, + uint32_t draw_buf_stride, + bool indexed) +{ + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + bool triangle_fan = pipeline->ia.triangle_fan; + uint32_t min_draw_buf_stride = + indexed ? + sizeof(struct dzn_indirect_indexed_draw_params) : + sizeof(struct dzn_indirect_draw_params); + + draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride; + assert(draw_buf_stride >= min_draw_buf_stride); + assert((draw_buf_stride & 3) == 0); + + uint32_t sysvals_stride = ALIGN_POT(sizeof(cmdbuf->state.sysvals.gfx), 256); + uint32_t exec_buf_stride = 32; + uint32_t triangle_fan_index_buf_stride = + dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, indexed) * + sizeof(uint32_t); + uint32_t triangle_fan_exec_buf_stride = + sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); + ID3D12Resource *exec_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, draw_count * exec_buf_stride, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &exec_buf); + if (result != VK_SUCCESS) + return; + + D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu = + draw_buf->res->GetGPUVirtualAddress() + draw_buf_offset; + ID3D12Resource *triangle_fan_index_buf = NULL; + ID3D12Resource *triangle_fan_exec_buf = NULL; + + if (triangle_fan_index_buf_stride) { + result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, + draw_count * triangle_fan_index_buf_stride, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &triangle_fan_index_buf); + if (result != VK_SUCCESS) + return; + + result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, + draw_count * triangle_fan_exec_buf_stride, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + &triangle_fan_exec_buf); + if (result != VK_SUCCESS) + return; + } + + struct dzn_indirect_draw_triangle_fan_rewrite_params params = { + .draw_buf_stride = draw_buf_stride, + .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride, + .triangle_fan_index_buf_start = + triangle_fan_index_buf ? + triangle_fan_index_buf->GetGPUVirtualAddress() : 0, + }; + uint32_t params_size = + triangle_fan_index_buf_stride > 0 ? + sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : + sizeof(struct dzn_indirect_draw_rewrite_params); + + enum dzn_indirect_draw_type draw_type; + + if (indexed && triangle_fan_index_buf_stride > 0) + draw_type = DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; + else if (!indexed && triangle_fan_index_buf_stride > 0) + draw_type = DZN_INDIRECT_DRAW_TRIANGLE_FAN; + else if (indexed) + draw_type = DZN_INDIRECT_INDEXED_DRAW; + else + draw_type = DZN_INDIRECT_DRAW; + + dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type]; + + const dzn_pipeline *compute_pipeline = + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; + + cmdbuf->cmdlist->SetComputeRootSignature(indirect_draw->root_sig); + cmdbuf->cmdlist->SetPipelineState(indirect_draw->pipeline_state); + cmdbuf->cmdlist->SetComputeRoot32BitConstants(0, params_size / 4, (const void *)¶ms, 0); + cmdbuf->cmdlist->SetComputeRootShaderResourceView(1, draw_buf_gpu); + cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(2, exec_buf->GetGPUVirtualAddress()); + if (triangle_fan_exec_buf) + cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(3, triangle_fan_exec_buf->GetGPUVirtualAddress()); + + cmdbuf->cmdlist->Dispatch(draw_count, 1, 1); + + D3D12_RESOURCE_BARRIER post_barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * pass to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = exec_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, + }, + }, + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * pass to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = triangle_fan_exec_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, + }, + }, + }; + + uint32_t post_barrier_count = triangle_fan_exec_buf ? 2 : 1; + + cmdbuf->cmdlist->ResourceBarrier(post_barrier_count, post_barriers); + + D3D12_INDEX_BUFFER_VIEW ib_view = {}; + + if (triangle_fan_exec_buf) { + auto index_type = + indexed ? + dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format) : + DZN_NO_INDEX; + dzn_meta_triangle_fan_rewrite_index *rewrite_index = + &device->triangle_fan[index_type]; + + struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = {}; + + assert(rewrite_index->root_sig); + assert(rewrite_index->pipeline_state); + assert(rewrite_index->cmd_sig); + + cmdbuf->cmdlist->SetComputeRootSignature(rewrite_index->root_sig); + cmdbuf->cmdlist->SetPipelineState(rewrite_index->pipeline_state); + cmdbuf->cmdlist->SetComputeRootUnorderedAccessView(0, triangle_fan_index_buf->GetGPUVirtualAddress()); + cmdbuf->cmdlist->SetComputeRoot32BitConstants(1, sizeof(rewrite_index_params) / 4, + (const void *)&rewrite_index_params, 0); + + if (indexed) + cmdbuf->cmdlist->SetComputeRootShaderResourceView(2, cmdbuf->state.ib.view.BufferLocation); + + cmdbuf->cmdlist->ExecuteIndirect(rewrite_index->cmd_sig, + draw_count, triangle_fan_exec_buf, + 0, NULL, 0); + + D3D12_RESOURCE_BARRIER index_buf_barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = triangle_fan_index_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + .StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER, + }, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(index_buf_barriers), index_buf_barriers); + + /* After our triangle-fan lowering the draw is indexed */ + indexed = true; + ib_view = cmdbuf->state.ib.view; + cmdbuf->state.ib.view.BufferLocation = triangle_fan_index_buf->GetGPUVirtualAddress(); + cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride; + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } + + /* We don't mess up with the driver state when executing our internal + * compute shader, but we still change the D3D12 state, so let's mark + * things dirty if needed. + */ + cmdbuf->state.pipeline = NULL; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } + + cmdbuf->state.sysvals.gfx.first_vertex = 0; + cmdbuf->state.sysvals.gfx.base_instance = 0; + cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + dzn_cmd_buffer_prepare_draw(cmdbuf, indexed); + + /* Restore the old IB view if we modified it during the triangle fan lowering */ + if (ib_view.SizeInBytes) { + cmdbuf->state.ib.view = ib_view; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } + + enum dzn_indirect_draw_cmd_sig_type cmd_sig_type = + triangle_fan_index_buf_stride > 0 ? + DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG : + indexed ? + DZN_INDIRECT_INDEXED_DRAW_CMD_SIG : + DZN_INDIRECT_DRAW_CMD_SIG; + ID3D12CommandSignature *cmdsig = + dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_type); + + if (!cmdsig) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return; + } + + cmdbuf->cmdlist->ExecuteIndirect(cmdsig, + draw_count, exec_buf, 0, NULL, 0); +} + +static void +dzn_cmd_buffer_prepare_dispatch(dzn_cmd_buffer *cmdbuf) +{ + dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); + + /* Reset the dirty states */ + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); + VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); + + for (int i = 0; i < info->regionCount; i++) { + auto ®ion = info->pRegions[i]; + + cmdbuf->cmdlist->CopyBufferRegion(dst_buffer->res, region.dstOffset, + src_buffer->res, region.srcOffset, + region.size); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + for (int i = 0; i < info->regionCount; i++) { + const VkBufferImageCopy2 ®ion = info->pRegions[i]; + + dzn_foreach_aspect(aspect, region.imageSubresource.aspectMask) { + for (uint32_t l = 0; l < region.imageSubresource.layerCount; l++) + dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + for (int i = 0; i < info->regionCount; i++) { + const VkBufferImageCopy2 ®ion = info->pRegions[i]; + + dzn_foreach_aspect(aspect, region.imageSubresource.aspectMask) { + for (uint32_t l = 0; l < region.imageSubresource.layerCount; l++) + dzn_cmd_buffer_copy_img2buf_region(cmdbuf,info, i, aspect, l); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyImage2(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_image, src, info->srcImage); + VK_FROM_HANDLE(dzn_image, dst, info->dstImage); + + assert(src->vk.samples == dst->vk.samples); + + bool requires_temp_res = src->vk.format != dst->vk.format && + src->vk.tiling != VK_IMAGE_TILING_LINEAR && + dst->vk.tiling != VK_IMAGE_TILING_LINEAR; + + /* FIXME: multisample copies only work if we copy the entire subresource + * and if the the copy doesn't require a temporary linear resource. When + * these conditions are not met we should use a blit shader. + */ + if (src->vk.samples > 1) { + assert(requires_temp_res == false); + + for (uint32_t i = 0; i < info->regionCount; i++) { + const VkImageCopy2 ®ion = info->pRegions[i]; + uint32_t src_w = u_minify(src->vk.extent.width, region.srcSubresource.mipLevel); + uint32_t src_h = u_minify(src->vk.extent.width, region.srcSubresource.mipLevel); + + assert(region.srcOffset.x == 0 && region.srcOffset.y == 0); + assert(region.extent.width == u_minify(src->vk.extent.width, region.srcSubresource.mipLevel)); + assert(region.extent.height == u_minify(src->vk.extent.height, region.srcSubresource.mipLevel)); + assert(region.dstOffset.x == 0 && region.dstOffset.y == 0); + assert(region.extent.width == u_minify(dst->vk.extent.width, region.dstSubresource.mipLevel)); + assert(region.extent.height == u_minify(dst->vk.extent.height, region.dstSubresource.mipLevel)); + } + } + + D3D12_TEXTURE_COPY_LOCATION tmp_loc = {}; + D3D12_RESOURCE_DESC tmp_desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = src->desc.Format, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + if (requires_temp_res) { + ID3D12Device *dev = device->dev; + VkImageAspectFlags aspect = 0; + uint64_t max_size = 0; + + if (vk_format_has_depth(src->vk.format)) + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + else if (vk_format_has_stencil(src->vk.format)) + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + else + aspect = VK_IMAGE_ASPECT_COLOR_BIT; + + for (uint32_t i = 0; i < info->regionCount; i++) { + const VkImageCopy2 ®ion = info->pRegions[i]; + uint64_t region_size = 0; + + tmp_desc.Format = + dzn_image_get_dxgi_format(src->vk.format, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + aspect); + tmp_desc.Width = region.extent.width; + tmp_desc.Height = region.extent.height; + + dev->GetCopyableFootprints(&src->desc, + 0, 1, 0, + NULL, NULL, NULL, + ®ion_size); + max_size = MAX2(max_size, region_size * region.extent.depth); + } + + + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_COPY_DEST, + &tmp_loc.pResource); + if (result != VK_SUCCESS) + return; + + tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + } + + for (int i = 0; i < info->regionCount; i++) { + const VkImageCopy2 ®ion = info->pRegions[i]; + + dzn_foreach_aspect(aspect, region.srcSubresource.aspectMask) { + for (uint32_t l = 0; l < region.srcSubresource.layerCount; l++) + dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, tmp_desc, tmp_loc, i, aspect, l); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBlitImage2(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + if (info->regionCount == 0) + return; + + uint32_t desc_count = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); + + dzn_descriptor_heap *heap; + uint32_t heap_slot; + VkResult result = + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, + desc_count, &heap, &heap_slot); + + if (result != VK_SUCCESS) { + cmdbuf->error = result; + return; + } + + if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { + ID3D12DescriptorHeap * const heaps[] = { heap->heap }; + cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; + cmdbuf->cmdlist->SetDescriptorHeaps(ARRAY_SIZE(heaps), heaps); + } + + cmdbuf->cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + uint32_t heap_offset = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, r); + + cmdbuf->state.pipeline = NULL; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdResolveImage2(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2 *info) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + if (info->regionCount == 0) + return; + + uint32_t desc_count = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); + + dzn_descriptor_heap *heap; + uint32_t heap_slot; + VkResult result = + dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, + desc_count, &heap, &heap_slot); + if (result != VK_SUCCESS) { + cmdbuf->error = result; + return; + } + + if (heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) { + ID3D12DescriptorHeap * const heaps[] = { heap->heap }; + cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = heap; + cmdbuf->cmdlist->SetDescriptorHeaps(ARRAY_SIZE(heaps), heaps); + } + + cmdbuf->cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + uint32_t heap_offset = 0; + for (uint32_t r = 0; r < info->regionCount; r++) + dzn_cmd_buffer_resolve_region(cmdbuf, info, heap, &heap_offset, r); + + cmdbuf->state.pipeline = NULL; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; + if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_image, img, image); + + dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_image, img, image); + + dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDispatch(VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->state.sysvals.compute.group_count_x = groupCountX; + cmdbuf->state.sysvals.compute.group_count_y = groupCountY; + cmdbuf->state.sysvals.compute.group_count_z = groupCountZ; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + dzn_cmd_buffer_prepare_dispatch(cmdbuf); + cmdbuf->cmdlist->Dispatch(groupCountX, groupCountY, groupCountZ); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); + + if (size == VK_WHOLE_SIZE) + size = buf->size - dstOffset; + + size &= ~3ULL; + + ID3D12Resource *src_res; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + uint32_t *cpu_ptr; + src_res->Map(0, NULL, (void **)&cpu_ptr); + for (uint32_t i = 0; i < size / 4; i++) + cpu_ptr[i] = data; + + src_res->Unmap(0, NULL); + + cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset, src_res, 0, size); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + const void *data) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); + + if (size == VK_WHOLE_SIZE) + size = buf->size - dstOffset; + + /* + * The spec says: + * "size is the number of bytes to fill, and must be either a multiple of + * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the + * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer + * is not a multiple of 4, then the nearest smaller multiple is used." + */ + size &= ~3ULL; + + ID3D12Resource *src_res; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, + D3D12_HEAP_TYPE_UPLOAD, + D3D12_RESOURCE_STATE_GENERIC_READ, + &src_res); + if (result != VK_SUCCESS) + return; + + void *cpu_ptr; + src_res->Map(0, NULL, &cpu_ptr); + memcpy(cpu_ptr, data, size), + src_res->Unmap(0, NULL); + + cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset, src_res, 0, size); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdClearAttachments(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + dzn_cmd_buffer_clear_attachments(cmdbuf, attachmentCount, pAttachments, rectCount, pRects); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfo *pSubpassBeginInfo) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_render_pass, pass, pRenderPassBeginInfo->renderPass); + VK_FROM_HANDLE(dzn_framebuffer, framebuffer, pRenderPassBeginInfo->framebuffer); + + assert(pass->attachment_count == framebuffer->attachment_count); + + cmdbuf->state.framebuffer = framebuffer; + cmdbuf->state.render_area = D3D12_RECT { + .left = pRenderPassBeginInfo->renderArea.offset.x, + .top = pRenderPassBeginInfo->renderArea.offset.y, + .right = (LONG)(pRenderPassBeginInfo->renderArea.offset.x + pRenderPassBeginInfo->renderArea.extent.width), + .bottom = (LONG)(pRenderPassBeginInfo->renderArea.offset.y + pRenderPassBeginInfo->renderArea.extent.height), + }; + + // The render area has an impact on the scissor state. + cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; + cmdbuf->state.pass = pass; + cmdbuf->state.subpass = 0; + dzn_cmd_buffer_begin_subpass(cmdbuf); + + uint32_t clear_count = + MIN2(pRenderPassBeginInfo->clearValueCount, framebuffer->attachment_count); + for (int i = 0; i < clear_count; ++i) { + VkImageAspectFlags aspectMask = 0; + + if (vk_format_is_depth_or_stencil(pass->attachments[i].format)) { + if (pass->attachments[i].clear.depth) + aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (pass->attachments[i].clear.stencil) + aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } else if (pass->attachments[i].clear.color) { + aspectMask |= VK_IMAGE_ASPECT_COLOR_BIT; + } + + dzn_cmd_buffer_clear_attachment(cmdbuf, i, &pRenderPassBeginInfo->pClearValues[i], + aspectMask, 0, ~0, 1, &cmdbuf->state.render_area); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + dzn_cmd_buffer_end_subpass(cmdbuf); + + for (uint32_t i = 0; i < cmdbuf->state.pass->attachment_count; i++) + dzn_cmd_buffer_attachment_transition(cmdbuf, &cmdbuf->state.pass->attachments[i]); + + cmdbuf->state.framebuffer = NULL; + cmdbuf->state.pass = NULL; + cmdbuf->state.subpass = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdNextSubpass2(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + dzn_cmd_buffer_end_subpass(cmdbuf); + assert(cmdbuf->state.subpass + 1 < cmdbuf->state.pass->subpass_count); + cmdbuf->state.subpass++; + dzn_cmd_buffer_begin_subpass(cmdbuf); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipe) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe); + + cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline; + cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { + const dzn_graphics_pipeline *gfx = (const dzn_graphics_pipeline *)pipeline; + + if (!gfx->vp.dynamic) { + memcpy(cmdbuf->state.viewports, gfx->vp.desc, + gfx->vp.count * sizeof(cmdbuf->state.viewports[0])); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; + } + + if (!gfx->scissor.dynamic) { + memcpy(cmdbuf->state.scissors, gfx->scissor.desc, + gfx->scissor.count * sizeof(cmdbuf->state.scissors[0])); + cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; + } + + if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) { + cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref; + cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; + } + + for (uint32_t vb = 0; vb < gfx->vb.count; vb++) + cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb]; + + if (gfx->vb.count > 0) + BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + struct dzn_descriptor_state *desc_state = + &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state; + uint32_t dirty = 0; + + for (uint32_t i = 0; i < descriptorSetCount; i++) { + uint32_t idx = firstSet + i; + VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]); + + if (desc_state->sets[idx].set != set) { + desc_state->sets[idx].set = set; + dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; + } + + uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count; + if (dynamic_buffer_count) { + assert(dynamicOffsetCount >= dynamic_buffer_count); + + for (uint32_t j = 0; j < dynamic_buffer_count; j++) + desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j]; + + dynamicOffsetCount -= dynamic_buffer_count; + pDynamicOffsets += dynamic_buffer_count; + dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; + } + } + + cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetViewport(VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT); + + for (uint32_t i = 0; i < viewportCount; i++) { + uint32_t vp = i + firstViewport; + + dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]); + + if (pViewports[i].minDepth > pViewports[i].maxDepth) + cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); + else + cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); + + if (pViewports[i].height > 0) + cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp); + else + cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp); + } + + if (viewportCount) { + cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetScissor(VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + for (uint32_t i = 0; i < scissorCount; i++) + dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]); + + if (scissorCount) + cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, + VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, + const void *pValues) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + struct dzn_cmd_buffer_push_constant_state *states[2]; + uint32_t num_states = 0; + + if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) + states[num_states++] = &cmdbuf->state.push_constant.gfx; + + if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) + states[num_states++] = &cmdbuf->state.push_constant.compute; + + for (uint32_t i = 0; i < num_states; i++) { + memcpy(((char *)states[i]->values) + offset, pValues, size); + + uint32_t current_offset = states[i]->offset; + uint32_t current_end = states[i]->end; + uint32_t end = offset + size; + if (current_end != 0) { + offset = MIN2(current_offset, offset); + end = MAX2(current_end, end); + } + states[i]->offset = offset; + states[i]->end = end; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDraw(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + const dzn_graphics_pipeline *pipeline = (const dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + + cmdbuf->state.sysvals.gfx.first_vertex = firstVertex; + cmdbuf->state.sysvals.gfx.base_instance = firstInstance; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + if (pipeline->ia.triangle_fan) { + D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; + + VkResult result = + dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount); + if (result != VK_SUCCESS || !vertexCount) + return; + + cmdbuf->state.sysvals.gfx.is_indexed_draw = true; + dzn_cmd_buffer_prepare_draw(cmdbuf, true); + cmdbuf->cmdlist->DrawIndexedInstanced(vertexCount, instanceCount, 0, + firstVertex, firstInstance); + + /* Restore the IB view if we modified it when lowering triangle fans. */ + if (ib_view.SizeInBytes > 0) { + cmdbuf->state.ib.view = ib_view; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } + } else { + cmdbuf->state.sysvals.gfx.is_indexed_draw = false; + dzn_cmd_buffer_prepare_draw(cmdbuf, false); + cmdbuf->cmdlist->DrawInstanced(vertexCount, instanceCount, + firstVertex, firstInstance); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + const dzn_graphics_pipeline *pipeline = (const dzn_graphics_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; + + cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset; + cmdbuf->state.sysvals.gfx.base_instance = firstInstance; + cmdbuf->state.sysvals.gfx.is_indexed_draw = true; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; + + if (pipeline->ia.triangle_fan) { + VkResult result = + dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex); + if (result != VK_SUCCESS || !indexCount) + return; + } + + dzn_cmd_buffer_prepare_draw(cmdbuf, true); + cmdbuf->cmdlist->DrawIndexedInstanced(indexCount, instanceCount, firstIndex, + vertexOffset, firstInstance); + + /* Restore the IB view if we modified it when lowering triangle fans. */ + if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) { + cmdbuf->state.ib.view = ib_view; + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, false); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + dzn_cmd_buffer_indirect_draw(cmdbuf, buf, offset, drawCount, stride, true); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (!bindingCount) + return; + + D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views; + + for (uint32_t i = 0; i < bindingCount; i++) { + VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]); + + vbviews[firstBinding + i].BufferLocation = buf->res->GetGPUVirtualAddress() + pOffsets[i]; + vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i]; + } + + BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding, + firstBinding + bindingCount - 1); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + cmdbuf->state.ib.view.BufferLocation = buf->res->GetGPUVirtualAddress() + offset; + cmdbuf->state.ib.view.SizeInBytes = buf->size - offset; + switch (indexType) { + case VK_INDEX_TYPE_UINT16: + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; + break; + case VK_INDEX_TYPE_UINT32: + cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; + break; + default: unreachable("Invalid index type"); + } + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdResetEvent(VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_event, evt, event); + + if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_RESET)) + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetEvent(VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_event, evt, event); + + if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, (void *)(uintptr_t)DZN_EVENT_STATE_SET)) + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdWaitEvents(VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + + /* Intra-command list wait is handle by this pipeline flush, which is + * overkill, but that's the best we can do with the standard D3D12 barrier + * API. + * + * Inter-command list is taken care of by the serialization done at the + * ExecuteCommandList() level: + * "Calling ExecuteCommandLists twice in succession (from the same thread, + * or different threads) guarantees that the first workload (A) finishes + * before the second workload (B)" + * + * HOST -> DEVICE signaling is ignored and we assume events are always + * signaled when we reach the vkCmdWaitEvents() point.: + * "Command buffers in the submission can include vkCmdWaitEvents commands + * that wait on events that will not be signaled by earlier commands in the + * queue. Such events must be signaled by the application using vkSetEvent, + * and the vkCmdWaitEvents commands that wait upon them must not be inside + * a render pass instance. + * The event must be set before the vkCmdWaitEvents command is executed." + */ + bool flush_pipeline = false; + + for (uint32_t i = 0; i < eventCount; i++) { + VK_FROM_HANDLE(dzn_event, event, pEvents[i]); + + struct hash_entry *he = + _mesa_hash_table_search(cmdbuf->events.ht, event); + if (he) { + enum dzn_event_state state = (enum dzn_event_state)(uintptr_t)he->data; + assert(state != DZN_EVENT_STATE_RESET); + flush_pipeline = state == DZN_EVENT_STATE_SET; + } else { + if (!_mesa_hash_table_insert(cmdbuf->events.ht, event, + (void *)(uintptr_t)DZN_EVENT_STATE_EXTERNAL_WAIT)) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + dzn_event **entry = (dzn_event **) + util_dynarray_grow(&cmdbuf->events.wait, dzn_event *, 1); + + if (!entry) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + *entry = event; + } + } + + if (flush_pipeline) { + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .UAV = { .pResource = NULL }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdBeginQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!state) + return; + + qpool->queries[query].type = dzn_query_pool_get_query_type(qpool, flags); + dzn_cmd_buffer_dynbitset_clear(cmdbuf, &state->collect, query); + cmdbuf->cmdlist->BeginQuery(qpool->heap, qpool->queries[query].type, query); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdEndQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!state) + return; + + dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); + cmdbuf->cmdlist->EndQuery(qpool->heap, qpool->queries[query].type, query); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!state) + return; + + /* Execution barrier so the timestamp gets written after the pipeline flush. */ + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .UAV = { .pResource = NULL }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + + qpool->queries[query].type = D3D12_QUERY_TYPE_TIMESTAMP; + dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); + cmdbuf->cmdlist->EndQuery(qpool->heap, qpool->queries[query].type, query); +} + + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + dzn_cmd_buffer_query_pool_state *state = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + + if (!state) + return; + + uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); + + for (uint32_t q = 0; q < queryCount; q += q_step) { + uint32_t q_count = MIN2(queryCount - q, q_step); + + cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer, + dzn_query_pool_get_availability_offset(qpool, firstQuery + q), + device->queries.refs, + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, + q_count * sizeof(uint64_t)); + } + + q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size; + + for (uint32_t q = 0; q < queryCount; q += q_step) { + cmdbuf->cmdlist->CopyBufferRegion(qpool->collect_buffer, + dzn_query_pool_get_result_offset(qpool, firstQuery + q), + device->queries.refs, + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, + qpool->query_size); + } + + dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount); + dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); + + dzn_cmd_buffer_query_pool_state *qpstate = + dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); + if (!qpstate) + return; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + for (uint32_t i = 0; i < queryCount; i++) { + if (!dzn_cmd_buffer_dynbitset_test(&qpstate->collect, firstQuery + i) && + !dzn_cmd_buffer_dynbitset_test(&qpstate->signal, firstQuery + i)) + dzn_cmd_buffer_dynbitset_set(cmdbuf, &qpstate->wait, firstQuery + i); + } + } + + VkResult result = + dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount); + if (result != VK_SUCCESS) + return; + + bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) && + stride == qpool->query_size && + !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); +#define ALL_STATS \ + (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \ + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) + if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS && + qpool->pipeline_statistics != ALL_STATS) + raw_copy = false; +#undef ALL_STATS + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = qpool->collect_buffer, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); + + if (raw_copy) { + cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset, + qpool->collect_buffer, + dzn_query_pool_get_result_offset(qpool, firstQuery), + dzn_query_pool_get_result_size(qpool, queryCount)); + } else { + uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t); + + for (uint32_t q = 0; q < queryCount; q++) { + uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q); + uint32_t dst_counter_offset = 0; + + if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { + for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { + if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) + continue; + + cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset + dst_counter_offset, + qpool->collect_buffer, + res_offset + (c * sizeof(uint64_t)), + step); + dst_counter_offset += step; + } + } else { + cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset, + qpool->collect_buffer, + res_offset, step); + dst_counter_offset += step; + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + cmdbuf->cmdlist->CopyBufferRegion(buf->res, dstOffset + dst_counter_offset, + qpool->collect_buffer, + dzn_query_pool_get_availability_offset(qpool, firstQuery + q), + step); + } + + dstOffset += stride; + } + } + + DZN_SWAP(barrier.Transition.StateBefore, barrier.Transition.StateAfter); + cmdbuf->cmdlist->ResourceBarrier(1, &barrier); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + dzn_device *device = container_of(cmdbuf->vk.base.device, dzn_device, vk); + VK_FROM_HANDLE(dzn_buffer, buf, buffer); + + cmdbuf->state.sysvals.compute.group_count_x = 0; + cmdbuf->state.sysvals.compute.group_count_y = 0; + cmdbuf->state.sysvals.compute.group_count_z = 0; + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= + DZN_CMD_BINDPOINT_DIRTY_SYSVALS; + + dzn_cmd_buffer_prepare_dispatch(cmdbuf); + + dzn_compute_pipeline *pipeline = (dzn_compute_pipeline *) + cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; + ID3D12CommandSignature *cmdsig = + dzn_compute_pipeline_get_indirect_cmd_sig(pipeline); + + if (!cmdsig) { + cmdbuf->error = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + ID3D12Resource *exec_buf; + VkResult result = + dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, + D3D12_HEAP_TYPE_DEFAULT, + D3D12_RESOURCE_STATE_COPY_DEST, + &exec_buf); + if (result != VK_SUCCESS) + return; + + cmdbuf->cmdlist->CopyBufferRegion(exec_buf, 0, + buf->res, + offset, + sizeof(D3D12_DISPATCH_ARGUMENTS)); + cmdbuf->cmdlist->CopyBufferRegion(exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS), + buf->res, + offset, + sizeof(D3D12_DISPATCH_ARGUMENTS)); + D3D12_RESOURCE_BARRIER barriers[] = { + { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + /* Transition the exec buffer to indirect arg so it can be + * passed to ExecuteIndirect() as an argument buffer. + */ + .Transition = { + .pResource = exec_buf, + .Subresource = 0, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, + }, + }, + }; + + cmdbuf->cmdlist->ResourceBarrier(ARRAY_SIZE(barriers), barriers); + + cmdbuf->cmdlist->ExecuteIndirect(cmdsig, 1, exec_buf, 0, NULL, 0); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer, + float lineWidth) +{ + assert(lineWidth == 1.0f); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + dzn_stub(); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->cmdlist->OMSetBlendFactor(blendConstants); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->cmdlist->OMSetDepthBounds(minDepthBounds, maxDepthBounds); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask; + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask; + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zsa.stencil_test.front.ref = reference; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zsa.stencil_test.back.ref = reference; + + cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; +} diff --git a/src/microsoft/vulkan/dzn_cmd_exec_functions b/src/microsoft/vulkan/dzn_cmd_exec_functions new file mode 100644 index 00000000000..9512a8c7923 --- /dev/null +++ b/src/microsoft/vulkan/dzn_cmd_exec_functions @@ -0,0 +1,41 @@ +CmdBeginQuery +CmdBeginRenderPass2 +CmdBindDescriptorSets +CmdBindIndexBuffer +CmdBindPipeline +CmdBindVertexBuffers +CmdBlitImage2 +CmdClearAttachments +CmdClearColorImage +CmdClearDepthStencilImage +CmdCopyBuffer2 +CmdCopyBufferToImage2 +CmdCopyImage2 +CmdCopyImageToBuffer2 +CmdCopyQueryPoolResults +CmdDispatch +CmdDispatchIndirect +CmdDraw +CmdDrawIndexed +CmdDrawIndexedIndirect +CmdDrawIndirect +CmdEndQuery +CmdEndRenderPass2 +CmdFillBuffer +CmdNextSubpass2 +CmdPipelineBarrier2 +CmdPushConstants +CmdResetEvent +CmdResetQueryPool +CmdResolveImage2 +CmdSetBlendConstants +CmdSetDepthBias +CmdSetDepthBounds +CmdSetEvent +CmdSetLineWidth +CmdSetScissor +CmdSetStencilCompareMask +CmdSetStencilReference +CmdSetStencilWriteMask +CmdUpdateBuffer +CmdWaitEvents diff --git a/src/microsoft/vulkan/dzn_descriptor_set.cpp b/src/microsoft/vulkan/dzn_descriptor_set.cpp new file mode 100644 index 00000000000..520d94477c0 --- /dev/null +++ b/src/microsoft/vulkan/dzn_descriptor_set.cpp @@ -0,0 +1,1802 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include + +#include "vk_alloc.h" +#include "vk_descriptors.h" +#include "vk_util.h" + +using Microsoft::WRL::ComPtr; + +static D3D12_SHADER_VISIBILITY +translate_desc_visibility(VkShaderStageFlags in) +{ + switch (in) { + case VK_SHADER_STAGE_VERTEX_BIT: return D3D12_SHADER_VISIBILITY_VERTEX; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return D3D12_SHADER_VISIBILITY_HULL; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return D3D12_SHADER_VISIBILITY_DOMAIN; + case VK_SHADER_STAGE_GEOMETRY_BIT: return D3D12_SHADER_VISIBILITY_GEOMETRY; + case VK_SHADER_STAGE_FRAGMENT_BIT: return D3D12_SHADER_VISIBILITY_PIXEL; + default: return D3D12_SHADER_VISIBILITY_ALL; + } +} + +static D3D12_DESCRIPTOR_RANGE_TYPE +desc_type_to_range_type(VkDescriptorType in, bool writeable) +{ + switch (in) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return writeable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + default: + unreachable("Unsupported desc type"); + } +} + +static bool +is_dynamic_desc_type(VkDescriptorType desc_type) +{ + return (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); +} + +static uint32_t +num_descs_for_type(VkDescriptorType type, bool static_sampler) +{ + unsigned num_descs = 1; + + /* Some type map to an SRV or UAV depending on how the shaders is using the + * resource (NONWRITEABLE flag set or not), in that case we need to reserve + * slots for both the UAV and SRV descs. + */ + if (dzn_descriptor_type_depends_on_shader_usage(type)) + num_descs++; + + /* There's no combined SRV+SAMPLER type in d3d12, we need an descriptor + * for the sampler. + */ + if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + num_descs++; + + /* Don't count immutable samplers, they have their own descriptor. */ + if (static_sampler && + (type == VK_DESCRIPTOR_TYPE_SAMPLER || + type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)) + num_descs--; + + return num_descs; +} + +static void +dzn_descriptor_set_layout_destroy(dzn_descriptor_set_layout *set_layout, + const VkAllocationCallbacks *pAllocator) +{ + if (!set_layout) + return; + + dzn_device *device = container_of(set_layout->base.device, dzn_device, vk); + + vk_object_base_finish(&set_layout->base); + vk_free2(&device->vk.alloc, pAllocator, set_layout); +} + +static VkResult +dzn_descriptor_set_layout_create(dzn_device *device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *out) +{ + const VkDescriptorSetLayoutBinding *bindings = pCreateInfo->pBindings; + uint32_t binding_count = 0, static_sampler_count = 0, total_ranges = 0; + uint32_t dynamic_ranges_offset = 0, immutable_sampler_count = 0; + uint32_t range_count[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = {}; + + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + D3D12_SHADER_VISIBILITY visibility = + translate_desc_visibility(bindings[i].stageFlags); + VkDescriptorType desc_type = bindings[i].descriptorType; + bool has_sampler = + desc_type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding: + * + * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or + * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then + * pImmutableSamplers can be used to initialize a set of immutable + * samplers. [...] If descriptorType is not one of these descriptor + * types, then pImmutableSamplers is ignored. + * + * We need to be careful here and only parse pImmutableSamplers if we + * have one of the right descriptor types. + */ + bool immutable_samplers = + has_sampler && + bindings[i].pImmutableSamplers != NULL; + bool static_sampler = false; + + if (immutable_samplers && bindings[i].descriptorCount == 1) { + VK_FROM_HANDLE(dzn_sampler, sampler, bindings[i].pImmutableSamplers[0]); + + if (sampler->static_border_color != -1) + static_sampler = true; + } + + if (static_sampler) { + static_sampler_count += bindings[i].descriptorCount; + } else if (has_sampler) { + range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]++; + total_ranges++; + + if (immutable_samplers) + immutable_sampler_count += bindings[i].descriptorCount; + } + + if (desc_type != VK_DESCRIPTOR_TYPE_SAMPLER) { + range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++; + total_ranges++; + + if (dzn_descriptor_type_depends_on_shader_usage(desc_type)) { + range_count[visibility][D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]++; + total_ranges++; + } + + if (!is_dynamic_desc_type(desc_type)) { + uint32_t factor = + dzn_descriptor_type_depends_on_shader_usage(desc_type) ? 2 : 1; + dynamic_ranges_offset += bindings[i].descriptorCount * factor; + } + } + + binding_count = MAX2(binding_count, bindings[i].binding + 1); + } + + /* We need to allocate decriptor set layouts off the device allocator + * with DEVICE scope because they are reference counted and may not be + * destroyed when vkDestroyDescriptorSetLayout is called. + */ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct dzn_descriptor_set_layout, set_layout, 1); + VK_MULTIALLOC_DECL(&ma, D3D12_DESCRIPTOR_RANGE1, + ranges, total_ranges); + VK_MULTIALLOC_DECL(&ma, D3D12_STATIC_SAMPLER_DESC, static_samplers, + static_sampler_count); + VK_MULTIALLOC_DECL(&ma, const dzn_sampler *, immutable_samplers, + immutable_sampler_count); + VK_MULTIALLOC_DECL(&ma, dzn_descriptor_set_layout_binding, binfos, + binding_count); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &set_layout->base, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); + set_layout->static_samplers = static_samplers; + set_layout->static_sampler_count = static_sampler_count; + set_layout->immutable_samplers = immutable_samplers; + set_layout->immutable_sampler_count = immutable_sampler_count; + set_layout->bindings = binfos; + set_layout->binding_count = binding_count; + set_layout->dynamic_buffers.range_offset = dynamic_ranges_offset; + + for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { + dzn_foreach_pool_type (type) { + if (range_count[i][type]) { + set_layout->ranges[i][type] = ranges; + set_layout->range_count[i][type] = range_count[i][type]; + ranges += range_count[i][type]; + } + } + } + + VkDescriptorSetLayoutBinding *ordered_bindings; + VkResult ret = + vk_create_sorted_bindings(pCreateInfo->pBindings, + pCreateInfo->bindingCount, + &ordered_bindings); + if (ret != VK_SUCCESS) + return ret; + + assert(binding_count == + (pCreateInfo->bindingCount ? + (ordered_bindings[pCreateInfo->bindingCount - 1].binding + 1) : 0)); + + uint32_t range_idx[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES] = {}; + uint32_t static_sampler_idx = 0, immutable_sampler_idx = 0; + uint32_t dynamic_buffer_idx = 0; + uint32_t base_register = 0; + + for (uint32_t i = 0; i < binding_count; i++) { + binfos[i].static_sampler_idx = ~0; + binfos[i].immutable_sampler_idx = ~0; + binfos[i].dynamic_buffer_idx = ~0; + dzn_foreach_pool_type (type) + binfos[i].range_idx[type] = ~0; + } + + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + VkDescriptorType desc_type = ordered_bindings[i].descriptorType; + uint32_t binding = ordered_bindings[i].binding; + uint32_t desc_count = ordered_bindings[i].descriptorCount; + bool has_sampler = + desc_type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bool has_immutable_samplers = + has_sampler && + ordered_bindings[i].pImmutableSamplers != NULL; + bool has_static_sampler = has_immutable_samplers && desc_count == 1; + bool is_dynamic = is_dynamic_desc_type(desc_type); + + D3D12_SHADER_VISIBILITY visibility = + translate_desc_visibility(ordered_bindings[i].stageFlags); + binfos[binding].type = desc_type; + binfos[binding].visibility = visibility; + binfos[binding].base_shader_register = base_register; + assert(base_register + desc_count >= base_register); + base_register += desc_count; + + if (has_static_sampler) { + VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[0]); + + /* Not all border colors are supported. */ + if (sampler->static_border_color != -1) { + binfos[binding].static_sampler_idx = static_sampler_idx; + D3D12_STATIC_SAMPLER_DESC *desc = (D3D12_STATIC_SAMPLER_DESC *) + &static_samplers[static_sampler_idx]; + + desc->Filter = sampler->desc.Filter; + desc->AddressU = sampler->desc.AddressU; + desc->AddressV = sampler->desc.AddressV; + desc->AddressW = sampler->desc.AddressW; + desc->MipLODBias = sampler->desc.MipLODBias; + desc->MaxAnisotropy = sampler->desc.MaxAnisotropy; + desc->ComparisonFunc = sampler->desc.ComparisonFunc; + desc->BorderColor = sampler->static_border_color; + desc->MinLOD = sampler->desc.MinLOD; + desc->MaxLOD = sampler->desc.MaxLOD; + desc->ShaderRegister = binfos[binding].base_shader_register; + desc->ShaderVisibility = translate_desc_visibility(ordered_bindings[i].stageFlags); + static_sampler_idx++; + } else { + has_static_sampler = false; + } + } + + if (has_immutable_samplers && !has_static_sampler) { + binfos[binding].immutable_sampler_idx = immutable_sampler_idx; + for (uint32_t s = 0; s < desc_count; s++) { + VK_FROM_HANDLE(dzn_sampler, sampler, ordered_bindings[i].pImmutableSamplers[s]); + + immutable_samplers[immutable_sampler_idx++] = sampler; + } + } + + if (is_dynamic) { + binfos[binding].dynamic_buffer_idx = dynamic_buffer_idx; + for (uint32_t d = 0; d < desc_count; d++) + set_layout->dynamic_buffers.bindings[dynamic_buffer_idx + d] = binding; + dynamic_buffer_idx += desc_count; + assert(dynamic_buffer_idx <= MAX_DYNAMIC_BUFFERS); + } + + unsigned num_descs = + num_descs_for_type(desc_type, has_static_sampler); + if (!num_descs) continue; + + assert(visibility < ARRAY_SIZE(set_layout->ranges)); + + bool has_range[NUM_POOL_TYPES] = {}; + has_range[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = + has_sampler && !has_static_sampler; + has_range[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = + desc_type != VK_DESCRIPTOR_TYPE_SAMPLER; + + dzn_foreach_pool_type (type) { + if (!has_range[type]) continue; + + uint32_t idx = range_idx[visibility][type]++; + assert(idx < range_count[visibility][type]); + + binfos[binding].range_idx[type] = idx; + D3D12_DESCRIPTOR_RANGE1 *range = (D3D12_DESCRIPTOR_RANGE1 *) + &set_layout->ranges[visibility][type][idx]; + VkDescriptorType range_type = desc_type; + if (desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + range_type = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? + VK_DESCRIPTOR_TYPE_SAMPLER : + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } + range->RangeType = desc_type_to_range_type(range_type, false); + range->NumDescriptors = desc_count; + range->BaseShaderRegister = binfos[binding].base_shader_register; + range->Flags = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? + D3D12_DESCRIPTOR_RANGE_FLAG_NONE : + D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS; + if (is_dynamic) { + range->OffsetInDescriptorsFromTableStart = + set_layout->dynamic_buffers.range_offset + + set_layout->dynamic_buffers.desc_count; + set_layout->dynamic_buffers.count += range->NumDescriptors; + set_layout->dynamic_buffers.desc_count += range->NumDescriptors; + } else { + range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type]; + set_layout->range_desc_count[type] += range->NumDescriptors; + } + + if (!dzn_descriptor_type_depends_on_shader_usage(desc_type)) + continue; + + assert(idx + 1 < range_count[visibility][type]); + range_idx[visibility][type]++; + range[1] = range[0]; + range++; + range->RangeType = desc_type_to_range_type(range_type, true); + if (is_dynamic) { + range->OffsetInDescriptorsFromTableStart = + set_layout->dynamic_buffers.range_offset + + set_layout->dynamic_buffers.desc_count; + set_layout->dynamic_buffers.desc_count += range->NumDescriptors; + } else { + range->OffsetInDescriptorsFromTableStart = set_layout->range_desc_count[type]; + set_layout->range_desc_count[type] += range->NumDescriptors; + } + } + } + + free(ordered_bindings); + + *out = dzn_descriptor_set_layout_to_handle(set_layout); + return VK_SUCCESS; +} + +uint32_t +dzn_descriptor_set_layout_get_heap_offset(const dzn_descriptor_set_layout *layout, + uint32_t b, + D3D12_DESCRIPTOR_HEAP_TYPE type, + bool writeable) +{ + assert(b < layout->binding_count); + D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility; + assert(visibility < ARRAY_SIZE(layout->ranges)); + assert(type < NUM_POOL_TYPES); + + uint32_t range_idx = layout->bindings[b].range_idx[type]; + + if (range_idx == ~0) + return ~0; + + if (writeable && + !dzn_descriptor_type_depends_on_shader_usage(layout->bindings[b].type)) + return ~0; + + if (writeable) + range_idx++; + + assert(range_idx < layout->range_count[visibility][type]); + return layout->ranges[visibility][type][range_idx].OffsetInDescriptorsFromTableStart; +} + +uint32_t +dzn_descriptor_set_layout_get_desc_count(const dzn_descriptor_set_layout *layout, + uint32_t b) +{ + D3D12_SHADER_VISIBILITY visibility = layout->bindings[b].visibility; + assert(visibility < ARRAY_SIZE(layout->ranges)); + + dzn_foreach_pool_type (type) { + uint32_t range_idx = layout->bindings[b].range_idx[type]; + assert(range_idx == ~0 || range_idx < layout->range_count[visibility][type]); + + if (range_idx != ~0) + return layout->ranges[visibility][type][range_idx].NumDescriptors; + } + + return 0; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateDescriptorSetLayout(VkDevice device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + return dzn_descriptor_set_layout_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pSetLayout); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyDescriptorSetLayout(VkDevice device, + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks *pAllocator) +{ + dzn_descriptor_set_layout_destroy(dzn_descriptor_set_layout_from_handle(descriptorSetLayout), + pAllocator); +} + +static void +dzn_pipeline_layout_destroy(dzn_pipeline_layout *layout) +{ + dzn_device *device = container_of(layout->base.device, dzn_device, vk); + + if (layout->root.sig) + layout->root.sig->Release(); + + vk_free(&device->vk.alloc, layout); +} + +// Reserve two root parameters for the push constants and sysvals CBVs. +#define MAX_INTERNAL_ROOT_PARAMS 2 + +// One root parameter for samplers and the other one for views, multiplied by +// the number of visibility combinations, plus the internal root parameters. +#define MAX_ROOT_PARAMS ((MAX_SHADER_VISIBILITIES * 2) + MAX_INTERNAL_ROOT_PARAMS) + +// Maximum number of DWORDS (32-bit words) that can be used for a root signature +#define MAX_ROOT_DWORDS 64 + +static VkResult +dzn_pipeline_layout_create(dzn_device *device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *out) +{ + uint32_t binding_count = 0; + + for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]); + + if (!set_layout) + continue; + + binding_count += set_layout->binding_count; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, dzn_pipeline_layout, layout, 1); + VK_MULTIALLOC_DECL(&ma, dxil_spirv_vulkan_binding, + bindings, binding_count); + + if (!vk_multialloc_zalloc(&ma, &device->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT); + + for (uint32_t s = 0; s < pCreateInfo->setLayoutCount; s++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[s]); + + if (!set_layout || !set_layout->binding_count) + continue; + + layout->binding_translation[s].bindings = bindings; + bindings += set_layout->binding_count; + } + + uint32_t range_count = 0, static_sampler_count = 0; + + p_atomic_set(&layout->refcount, 1); + + layout->root.param_count = 0; + dzn_foreach_pool_type (type) + layout->desc_count[type] = 0; + + layout->set_count = pCreateInfo->setLayoutCount; + for (uint32_t j = 0; j < layout->set_count; j++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); + dxil_spirv_vulkan_binding *bindings = layout->binding_translation[j].bindings; + + layout->sets[j].dynamic_buffer_count = set_layout->dynamic_buffers.count; + memcpy(layout->sets[j].range_desc_count, set_layout->range_desc_count, + sizeof(layout->sets[j].range_desc_count)); + layout->binding_translation[j].binding_count = set_layout->binding_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) + bindings[b].base_register = set_layout->bindings[b].base_shader_register; + + static_sampler_count += set_layout->static_sampler_count; + dzn_foreach_pool_type (type) { + layout->sets[j].heap_offsets[type] = layout->desc_count[type]; + layout->desc_count[type] += set_layout->range_desc_count[type]; + for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) + range_count += set_layout->range_count[i][type]; + } + + layout->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += + set_layout->dynamic_buffers.desc_count; + for (uint32_t o = 0, elem = 0; o < set_layout->dynamic_buffers.count; o++, elem++) { + uint32_t b = set_layout->dynamic_buffers.bindings[o]; + + if (o > 0 && set_layout->dynamic_buffers.bindings[o - 1] != b) + elem = 0; + + uint32_t srv = + dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, false); + uint32_t uav = + dzn_descriptor_set_layout_get_heap_offset(set_layout, b, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, true); + + layout->sets[j].dynamic_buffer_heap_offsets[o].srv = srv != ~0 ? srv + elem : ~0; + layout->sets[j].dynamic_buffer_heap_offsets[o].uav = uav != ~0 ? uav + elem : ~0; + } + } + + D3D12_DESCRIPTOR_RANGE1 *ranges = (D3D12_DESCRIPTOR_RANGE1 *) + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*ranges) * range_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (range_count && !ranges) { + dzn_pipeline_layout_destroy(layout); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + D3D12_STATIC_SAMPLER_DESC *static_sampler_descs = (D3D12_STATIC_SAMPLER_DESC *) + vk_alloc2(&device->vk.alloc, pAllocator, + sizeof(*static_sampler_descs) * static_sampler_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (static_sampler_count && !static_sampler_descs) { + vk_free2(&device->vk.alloc, pAllocator, ranges); + dzn_pipeline_layout_destroy(layout); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + + D3D12_ROOT_PARAMETER1 root_params[MAX_ROOT_PARAMS] = {}; + D3D12_DESCRIPTOR_RANGE1 *range_ptr = ranges; + D3D12_ROOT_PARAMETER1 *root_param; + uint32_t root_dwords = 0; + + for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { + dzn_foreach_pool_type (type) { + root_param = &root_params[layout->root.param_count]; + root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_param->DescriptorTable.pDescriptorRanges = range_ptr; + root_param->DescriptorTable.NumDescriptorRanges = 0; + root_param->ShaderVisibility = (D3D12_SHADER_VISIBILITY)i; + + for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); + uint32_t range_count = set_layout->range_count[i][type]; + + memcpy(range_ptr, set_layout->ranges[i][type], + range_count * sizeof(D3D12_DESCRIPTOR_RANGE1)); + for (uint32_t k = 0; k < range_count; k++) { + range_ptr[k].RegisterSpace = j; + range_ptr[k].OffsetInDescriptorsFromTableStart += + layout->sets[j].heap_offsets[type]; + } + root_param->DescriptorTable.NumDescriptorRanges += range_count; + range_ptr += range_count; + } + + if (root_param->DescriptorTable.NumDescriptorRanges) { + layout->root.type[layout->root.param_count++] = (D3D12_DESCRIPTOR_HEAP_TYPE)type; + root_dwords++; + } + } + } + + layout->root.sets_param_count = layout->root.param_count; + + /* Add our sysval CBV, and make it visible to all shaders */ + layout->root.sysval_cbv_param_idx = layout->root.param_count; + root_param = &root_params[layout->root.param_count++]; + root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_param->Descriptor.RegisterSpace = DZN_REGISTER_SPACE_SYSVALS; + root_param->Constants.ShaderRegister = 0; + root_param->Constants.Num32BitValues = + DIV_ROUND_UP(MAX2(sizeof(struct dxil_spirv_vertex_runtime_data), + sizeof(struct dxil_spirv_compute_runtime_data)), + 4); + root_param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_dwords += root_param->Constants.Num32BitValues; + + D3D12_STATIC_SAMPLER_DESC *static_sampler_ptr = static_sampler_descs; + for (uint32_t j = 0; j < pCreateInfo->setLayoutCount; j++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[j]); + + memcpy(static_sampler_ptr, set_layout->static_samplers, + set_layout->static_sampler_count * sizeof(*set_layout->static_samplers)); + if (j > 0) { + for (uint32_t k = 0; k < set_layout->static_sampler_count; k++) + static_sampler_ptr[k].RegisterSpace = j; + } + static_sampler_ptr += set_layout->static_sampler_count; + } + + uint32_t push_constant_size = 0; + uint32_t push_constant_flags = 0; + for (uint32_t j = 0; j < pCreateInfo->pushConstantRangeCount; j++) { + const VkPushConstantRange* range = pCreateInfo->pPushConstantRanges + j; + push_constant_size = MAX2(push_constant_size, range->offset + range->size); + push_constant_flags |= range->stageFlags; + } + + if (push_constant_size > 0) { + layout->root.push_constant_cbv_param_idx = layout->root.param_count; + D3D12_ROOT_PARAMETER1 *root_param = &root_params[layout->root.param_count++]; + + root_param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_param->Constants.ShaderRegister = 0; + root_param->Constants.Num32BitValues = ALIGN(push_constant_size, 4) / 4; + root_param->Constants.RegisterSpace = DZN_REGISTER_SPACE_PUSH_CONSTANT; + root_param->ShaderVisibility = translate_desc_visibility(push_constant_flags); + root_dwords += root_param->Constants.Num32BitValues; + } + + assert(layout->root.param_count <= ARRAY_SIZE(root_params)); + assert(root_dwords <= MAX_ROOT_DWORDS); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = layout->root.param_count, + .pParameters = layout->root.param_count ? root_params : NULL, + .NumStaticSamplers =static_sampler_count, + .pStaticSamplers = static_sampler_descs, + /* TODO Only enable this flag when needed (optimization) */ + .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT, + }, + }; + + layout->root.sig = dzn_device_create_root_sig(device, &root_sig_desc); + vk_free2(&device->vk.alloc, pAllocator, ranges); + vk_free2(&device->vk.alloc, pAllocator, static_sampler_descs); + + if (!layout->root.sig) { + dzn_pipeline_layout_destroy(layout); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + *out = dzn_pipeline_layout_to_handle(layout); + return VK_SUCCESS; +} + +dzn_pipeline_layout * +dzn_pipeline_layout_ref(dzn_pipeline_layout *layout) +{ + if (layout) + p_atomic_inc(&layout->refcount); + + return layout; +} + +void +dzn_pipeline_layout_unref(dzn_pipeline_layout *layout) +{ + if (layout) { + if (p_atomic_dec_zero(&layout->refcount)) + dzn_pipeline_layout_destroy(layout); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreatePipelineLayout(VkDevice device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) +{ + return dzn_pipeline_layout_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pPipelineLayout); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyPipelineLayout(VkDevice device, + VkPipelineLayout layout, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + dzn_pipeline_layout_unref(playout); +} + +static D3D12_DESCRIPTOR_HEAP_TYPE +desc_type_to_heap_type(VkDescriptorType in) +{ + switch (in) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + default: + unreachable("Unsupported desc type"); + } +} + +bool +dzn_descriptor_type_depends_on_shader_usage(VkDescriptorType type) +{ + return type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER || + type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; +} + +void +dzn_descriptor_heap_finish(dzn_descriptor_heap *heap) +{ + if (heap->heap) + heap->heap->Release(); + + if (heap->dev) + heap->dev->Release(); +} + +VkResult +dzn_descriptor_heap_init(dzn_descriptor_heap *heap, + dzn_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t desc_count, + bool shader_visible) +{ + heap->desc_count = desc_count; + heap->type = type; + heap->dev = device->dev; + heap->dev->AddRef(); + heap->desc_sz = device->dev->GetDescriptorHandleIncrementSize(type); + + D3D12_DESCRIPTOR_HEAP_DESC desc = { + .Type = type, + .NumDescriptors = desc_count, + .Flags = shader_visible ? + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : + D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + }; + + if (FAILED(device->dev->CreateDescriptorHeap(&desc, + IID_PPV_ARGS(&heap->heap)))) { + return vk_error(device, + shader_visible ? + VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_ERROR_OUT_OF_HOST_MEMORY); + } + + heap->cpu_base = heap->heap->GetCPUDescriptorHandleForHeapStart().ptr; + if (shader_visible) + heap->gpu_base = heap->heap->GetGPUDescriptorHandleForHeapStart().ptr; + + return VK_SUCCESS; +} + +D3D12_CPU_DESCRIPTOR_HANDLE +dzn_descriptor_heap_get_cpu_handle(const dzn_descriptor_heap *heap, uint32_t desc_offset) +{ + return D3D12_CPU_DESCRIPTOR_HANDLE { + .ptr = heap->cpu_base + (desc_offset * heap->desc_sz), + }; +} + +D3D12_GPU_DESCRIPTOR_HANDLE +dzn_descriptor_heap_get_gpu_handle(const dzn_descriptor_heap *heap, uint32_t desc_offset) +{ + return D3D12_GPU_DESCRIPTOR_HANDLE { + .ptr = heap->gpu_base ? heap->gpu_base + (desc_offset * heap->desc_sz) : 0, + }; +} + +void +dzn_descriptor_heap_write_sampler_desc(dzn_descriptor_heap *heap, + uint32_t desc_offset, + const dzn_sampler *sampler) +{ + heap->dev->CreateSampler(&sampler->desc, + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset)); +} + +void +dzn_descriptor_heap_write_image_view_desc(dzn_descriptor_heap *heap, + uint32_t desc_offset, + bool writeable, bool cube_as_2darray, + const dzn_image_view *iview) +{ + D3D12_CPU_DESCRIPTOR_HANDLE view_handle = + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); + dzn_image *image = container_of(iview->vk.image, dzn_image, vk); + + if (writeable) { + heap->dev->CreateUnorderedAccessView(image->res, NULL, &iview->uav_desc, view_handle); + } else if (cube_as_2darray && + (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || + iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE)) { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = iview->srv_desc; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv_desc.Texture2DArray.PlaneSlice = 0; + if (iview->srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { + srv_desc.Texture2DArray.MostDetailedMip = + iview->srv_desc.TextureCubeArray.MostDetailedMip; + srv_desc.Texture2DArray.MipLevels = + iview->srv_desc.TextureCubeArray.MipLevels; + srv_desc.Texture2DArray.FirstArraySlice = + iview->srv_desc.TextureCubeArray.First2DArrayFace; + srv_desc.Texture2DArray.ArraySize = + iview->srv_desc.TextureCubeArray.NumCubes * 6; + } else { + srv_desc.Texture2DArray.MostDetailedMip = + iview->srv_desc.TextureCube.MostDetailedMip; + srv_desc.Texture2DArray.MipLevels = + iview->srv_desc.TextureCube.MipLevels; + srv_desc.Texture2DArray.FirstArraySlice = 0; + srv_desc.Texture2DArray.ArraySize = 6; + } + + heap->dev->CreateShaderResourceView(image->res, &srv_desc, view_handle); + } else { + heap->dev->CreateShaderResourceView(image->res, &iview->srv_desc, view_handle); + } +} + +void +dzn_descriptor_heap_write_buffer_view_desc(dzn_descriptor_heap *heap, + uint32_t desc_offset, + bool writeable, + const dzn_buffer_view *bview) +{ + D3D12_CPU_DESCRIPTOR_HANDLE view_handle = + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); + + if (writeable) + heap->dev->CreateUnorderedAccessView(bview->buffer->res, NULL, &bview->uav_desc, view_handle); + else + heap->dev->CreateShaderResourceView(bview->buffer->res, &bview->srv_desc, view_handle); +} + +void +dzn_descriptor_heap_write_buffer_desc(dzn_descriptor_heap *heap, + uint32_t desc_offset, + bool writeable, + const dzn_buffer_desc *info) +{ + D3D12_CPU_DESCRIPTOR_HANDLE view_handle = + dzn_descriptor_heap_get_cpu_handle(heap, desc_offset); + + VkDeviceSize size = + info->range == VK_WHOLE_SIZE ? + info->buffer->size - info->offset : + info->range; + + if (info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || + info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { + assert(!writeable); + D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = { + .BufferLocation = info->buffer->res->GetGPUVirtualAddress() + info->offset, + .SizeInBytes = ALIGN_POT(size, 256), + }; + heap->dev->CreateConstantBufferView(&cbv_desc, view_handle); + } else if (writeable) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer = { + .FirstElement = info->offset / sizeof(uint32_t), + .NumElements = (UINT)size / sizeof(uint32_t), + .Flags = D3D12_BUFFER_UAV_FLAG_RAW, + }, + }; + heap->dev->CreateUnorderedAccessView(info->buffer->res, NULL, &uav_desc, view_handle); + } else { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = { + .FirstElement = info->offset / sizeof(uint32_t), + .NumElements = (UINT)size / sizeof(uint32_t), + .Flags = D3D12_BUFFER_SRV_FLAG_RAW, + }, + }; + heap->dev->CreateShaderResourceView(info->buffer->res, &srv_desc, view_handle); + } +} + +void +dzn_descriptor_heap_copy(dzn_descriptor_heap *dst_heap, + uint32_t dst_offset, + const dzn_descriptor_heap *src_heap, + uint32_t src_offset, + uint32_t desc_count) +{ + D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = + dzn_descriptor_heap_get_cpu_handle(dst_heap, dst_offset); + D3D12_CPU_DESCRIPTOR_HANDLE src_handle = + dzn_descriptor_heap_get_cpu_handle(src_heap, src_offset); + + dst_heap->dev->CopyDescriptorsSimple(desc_count, + dst_handle, + src_handle, + dst_heap->type); +} + +struct dzn_descriptor_set_ptr { + uint32_t binding, elem; +}; + +static void +dzn_descriptor_set_ptr_validate(const dzn_descriptor_set *set, + dzn_descriptor_set_ptr *ptr) +{ + + if (ptr->binding >= set->layout->binding_count) { + ptr->binding = ~0; + ptr->elem = ~0; + return; + } + + uint32_t desc_count = + dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); + if (ptr->elem >= desc_count) { + ptr->binding = ~0; + ptr->elem = ~0; + } +} + +static void +dzn_descriptor_set_ptr_init(const dzn_descriptor_set *set, + dzn_descriptor_set_ptr *ptr, + uint32_t binding, uint32_t elem) +{ + ptr->binding = binding; + ptr->elem = elem; + dzn_descriptor_set_ptr_validate(set, ptr); +} + +static void +dzn_descriptor_set_ptr_move(const dzn_descriptor_set *set, + dzn_descriptor_set_ptr *ptr, + uint32_t count) +{ + if (ptr->binding == ~0) + return; + + while (count) { + uint32_t desc_count = + dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); + + if (count >= desc_count - ptr->elem) { + count -= desc_count - ptr->elem; + ptr->binding++; + ptr->elem = 0; + } else { + ptr->elem += count; + count = 0; + } + } + + dzn_descriptor_set_ptr_validate(set, ptr); +} + +bool +dzn_descriptor_set_ptr_is_valid(const dzn_descriptor_set_ptr *ptr) +{ + return ptr->binding != ~0 && ptr->elem != ~0; +} + +uint32_t +dzn_descriptor_set_remaining_descs_in_binding(const dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr) +{ + if (ptr->binding >= set->layout->binding_count) + return 0; + + uint32_t desc_count = + dzn_descriptor_set_layout_get_desc_count(set->layout, ptr->binding); + + return desc_count >= ptr->elem ? desc_count - ptr->elem : 0; +} + + +uint32_t +dzn_descriptor_set_get_heap_offset(const dzn_descriptor_set *set, + D3D12_DESCRIPTOR_HEAP_TYPE type, + const dzn_descriptor_set_ptr *ptr, + bool writeable) +{ + if (ptr->binding == ~0) + return ~0; + + uint32_t base = + dzn_descriptor_set_layout_get_heap_offset(set->layout, ptr->binding, type, writeable); + if (base == ~0) + return ~0; + + return base + ptr->elem; +} + +void +dzn_descriptor_set_write_sampler_desc(dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr, + const dzn_sampler *sampler) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + + if (heap_offset != ~0) { + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_sampler_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + sampler); + mtx_unlock(&set->pool->defragment_lock); + } +} + +uint32_t +dzn_descriptor_set_get_dynamic_buffer_idx(const dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr) +{ + if (ptr->binding == ~0) + return ~0; + + uint32_t base = set->layout->bindings[ptr->binding].dynamic_buffer_idx; + + if (base == ~0) + return ~0; + + return base + ptr->elem; +} + +void +dzn_descriptor_set_write_dynamic_buffer_desc(dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr, + const dzn_buffer_desc *info) +{ + uint32_t dynamic_buffer_idx = + dzn_descriptor_set_get_dynamic_buffer_idx(set, ptr); + if (dynamic_buffer_idx == ~0) + return; + + assert(dynamic_buffer_idx < set->layout->dynamic_buffers.count); + set->dynamic_buffers[dynamic_buffer_idx] = *info; +} + +VkDescriptorType +dzn_descriptor_set_get_desc_vk_type(const dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr) +{ + if (ptr->binding >= set->layout->binding_count) + return (VkDescriptorType)~0; + + return set->layout->bindings[ptr->binding].type; +} + +void +dzn_descriptor_set_write_image_view_desc(dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr, + bool cube_as_2darray, + const dzn_image_view *iview) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + if (heap_offset == ~0) + return; + + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + false, cube_as_2darray, + iview); + + VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); + if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { + heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, true); + assert(heap_offset != ~0); + dzn_descriptor_heap_write_image_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + true, cube_as_2darray, + iview); + } + mtx_unlock(&set->pool->defragment_lock); +} + +void +dzn_descriptor_set_write_buffer_view_desc(dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr, + const dzn_buffer_view *bview) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + if (heap_offset == ~0) + return; + + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + false, bview); + + VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); + if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { + heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, true); + assert(heap_offset != ~0); + dzn_descriptor_heap_write_buffer_view_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + true, bview); + } + mtx_unlock(&set->pool->defragment_lock); +} + +void +dzn_descriptor_set_write_buffer_desc(dzn_descriptor_set *set, + const dzn_descriptor_set_ptr *ptr, + const dzn_buffer_desc *bdesc) +{ + D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + uint32_t heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, false); + if (heap_offset == ~0) + return; + + mtx_lock(&set->pool->defragment_lock); + dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + false, bdesc); + + VkDescriptorType vk_type = dzn_descriptor_set_get_desc_vk_type(set, ptr); + if (dzn_descriptor_type_depends_on_shader_usage(vk_type)) { + heap_offset = + dzn_descriptor_set_get_heap_offset(set, type, ptr, true); + assert(heap_offset != ~0); + dzn_descriptor_heap_write_buffer_desc(&set->pool->heaps[type], + set->heap_offsets[type] + heap_offset, + true, bdesc); + } + mtx_unlock(&set->pool->defragment_lock); +} + +static void +dzn_descriptor_set_init(dzn_descriptor_set *set, + dzn_device *device, + dzn_descriptor_pool *pool, + dzn_descriptor_set_layout *layout) +{ + vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET); + + set->pool = pool; + set->layout = layout; + + mtx_lock(&pool->defragment_lock); + dzn_foreach_pool_type(type) { + set->heap_offsets[type] = pool->free_offset[type]; + set->heap_sizes[type] = layout->range_desc_count[type]; + set->pool->free_offset[type] += layout->range_desc_count[type]; + } + mtx_unlock(&pool->defragment_lock); + + /* Pre-fill the immutable samplers */ + if (layout->immutable_sampler_count) { + for (uint32_t b = 0; b < layout->binding_count; b++) { + bool has_samplers = + layout->bindings[b].type == VK_DESCRIPTOR_TYPE_SAMPLER || + layout->bindings[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + if (!has_samplers || layout->bindings[b].immutable_sampler_idx == ~0) + continue; + + dzn_descriptor_set_ptr ptr; + const dzn_sampler **sampler = + &layout->immutable_samplers[layout->bindings[b].immutable_sampler_idx]; + for (dzn_descriptor_set_ptr_init(set, &ptr, b, 0); + dzn_descriptor_set_ptr_is_valid(&ptr); + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + dzn_descriptor_set_write_sampler_desc(set, &ptr, *sampler); + sampler++; + } + } + } +} + +static void +dzn_descriptor_set_finish(dzn_descriptor_set *set) +{ + vk_object_base_finish(&set->base); + set->pool = NULL; + set->layout = NULL; +} + +static void +dzn_descriptor_pool_destroy(dzn_descriptor_pool *pool, + const VkAllocationCallbacks *pAllocator) +{ + if (!pool) + return; + + dzn_device *device = container_of(pool->base.device, dzn_device, vk); + + dzn_foreach_pool_type (type) { + if (pool->desc_count[type]) + dzn_descriptor_heap_finish(&pool->heaps[type]); + } + + vk_object_base_finish(&pool->base); + vk_free2(&device->vk.alloc, pAllocator, pool); +} + +static VkResult +dzn_descriptor_pool_create(dzn_device *device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *out) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, dzn_descriptor_pool, pool, 1); + VK_MULTIALLOC_DECL(&ma, dzn_descriptor_set, sets, pCreateInfo->maxSets); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->alloc = pAllocator ? *pAllocator : device->vk.alloc; + pool->sets = sets; + pool->set_count = pCreateInfo->maxSets; + mtx_init(&pool->defragment_lock, mtx_plain); + + vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_DESCRIPTOR_POOL); + + for (uint32_t p = 0; p < pCreateInfo->poolSizeCount; p++) { + VkDescriptorType type = pCreateInfo->pPoolSizes[p].type; + uint32_t num_desc = pCreateInfo->pPoolSizes[p].descriptorCount; + + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc; + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] += num_desc; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + /* Reserve one UAV and one SRV slot for those. */ + pool->desc_count[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] += num_desc * 2; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + break; + default: + unreachable("Unsupported desc type"); + } + } + + dzn_foreach_pool_type (type) { + if (!pool->desc_count[type]) + continue; + + VkResult result = + dzn_descriptor_heap_init(&pool->heaps[type], device, type, pool->desc_count[type], false); + if (result != VK_SUCCESS) { + dzn_descriptor_pool_destroy(pool, pAllocator); + return result; + } + } + + *out = dzn_descriptor_pool_to_handle(pool); + return VK_SUCCESS; +} + +VkResult +dzn_descriptor_pool_defragment_heap(dzn_descriptor_pool *pool, + D3D12_DESCRIPTOR_HEAP_TYPE type) +{ + dzn_device *device = container_of(pool->base.device, dzn_device, vk); + dzn_descriptor_heap new_heap; + + VkResult result = + dzn_descriptor_heap_init(&new_heap, device, type, + pool->heaps[type].desc_count, + false); + if (result != VK_SUCCESS) + return result; + + mtx_lock(&pool->defragment_lock); + uint32_t heap_offset = 0; + for (uint32_t s = 0; s < pool->set_count; s++) { + if (!pool->sets[s].layout) + continue; + + dzn_descriptor_heap_copy(&new_heap, heap_offset, + &pool->heaps[type], + pool->sets[s].heap_offsets[type], + pool->sets[s].heap_sizes[type]); + pool->sets[s].heap_offsets[type] = heap_offset; + heap_offset += pool->sets[s].heap_sizes[type]; + } + mtx_unlock(&pool->defragment_lock); + + dzn_descriptor_heap_finish(&pool->heaps[type]); + pool->heaps[type] = new_heap; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateDescriptorPool(VkDevice device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + return dzn_descriptor_pool_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pDescriptorPool); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyDescriptorPool(VkDevice device, + VkDescriptorPool descriptorPool, + const VkAllocationCallbacks *pAllocator) +{ + return dzn_descriptor_pool_destroy(dzn_descriptor_pool_from_handle(descriptorPool), + pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_ResetDescriptorPool(VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool); + + for (uint32_t s = 0; s < pool->set_count; s++) + dzn_descriptor_set_finish(&pool->sets[s]); + + dzn_foreach_pool_type(type) + pool->free_offset[type] = 0; + + return VK_SUCCESS; +} + +void +dzn_descriptor_heap_pool_finish(dzn_descriptor_heap_pool *pool) +{ + list_splicetail(&pool->active_heaps, &pool->free_heaps); + list_for_each_entry_safe(dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) { + list_del(&entry->link); + dzn_descriptor_heap_finish(&entry->heap); + vk_free(pool->alloc, entry); + } +} + +void +dzn_descriptor_heap_pool_init(dzn_descriptor_heap_pool *pool, + dzn_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, + bool shader_visible, + const VkAllocationCallbacks *alloc) +{ + assert(!shader_visible || + type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + pool->alloc = alloc; + pool->type = type; + pool->shader_visible = shader_visible; + list_inithead(&pool->active_heaps); + list_inithead(&pool->free_heaps); + pool->offset = 0; + pool->desc_sz = device->dev->GetDescriptorHandleIncrementSize(type); +} + +VkResult +dzn_descriptor_heap_pool_alloc_slots(dzn_descriptor_heap_pool *pool, + dzn_device *device, uint32_t desc_count, + dzn_descriptor_heap **heap, + uint32_t *first_slot) +{ + dzn_descriptor_heap *last_heap = + list_is_empty(&pool->active_heaps) ? + NULL : + &(list_last_entry(&pool->active_heaps, dzn_descriptor_heap_pool_entry, link)->heap); + uint32_t last_heap_desc_count = + last_heap ? last_heap->desc_count : 0; + + if (pool->offset + desc_count > last_heap_desc_count) { + uint32_t granularity = + (pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || + pool->type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) ? + 64 * 1024 : 4 * 1024; + uint32_t alloc_step = ALIGN_POT(desc_count * pool->desc_sz, granularity); + uint32_t heap_desc_count = MAX2(alloc_step / pool->desc_sz, 16); + dzn_descriptor_heap_pool_entry *new_heap = NULL; + + list_for_each_entry_safe(dzn_descriptor_heap_pool_entry, entry, &pool->free_heaps, link) { + if (entry->heap.desc_count >= heap_desc_count) { + new_heap = entry; + list_del(&entry->link); + break; + } + } + + if (!new_heap) { + new_heap = (dzn_descriptor_heap_pool_entry *) + vk_zalloc(pool->alloc, sizeof(*new_heap), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!new_heap) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = + dzn_descriptor_heap_init(&new_heap->heap, device, pool->type, + heap_desc_count, pool->shader_visible); + if (result != VK_SUCCESS) { + vk_free(&device->vk.alloc, new_heap); + return result; + } + } + + list_addtail(&new_heap->link, &pool->active_heaps); + pool->offset = 0; + last_heap = &new_heap->heap; + } + + *heap = last_heap; + *first_slot = pool->offset; + pool->offset += desc_count; + return VK_SUCCESS; +} + +void +dzn_descriptor_heap_pool_reset(dzn_descriptor_heap_pool *pool) +{ + pool->offset = 0; + list_splicetail(&pool->active_heaps, &pool->free_heaps); + list_inithead(&pool->free_heaps); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_AllocateDescriptorSets(VkDevice dev, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(dzn_descriptor_pool, pool, pAllocateInfo->descriptorPool); + VK_FROM_HANDLE(dzn_device, device, dev); + VkResult result; + unsigned i; + + if (pAllocateInfo->descriptorSetCount > (pool->set_count - pool->used_set_count)) + return VK_ERROR_OUT_OF_POOL_MEMORY; + + uint32_t set_idx = 0; + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + VK_FROM_HANDLE(dzn_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); + + dzn_foreach_pool_type(type) { + if (pool->used_desc_count[type] + layout->range_desc_count[type] > pool->desc_count[type]) { + dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets); + return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY); + } + + if (pool->free_offset[type] + layout->range_desc_count[type] > pool->desc_count[type]) { + result = dzn_descriptor_pool_defragment_heap(pool, type); + if (result != VK_SUCCESS) { + dzn_FreeDescriptorSets(dev, pAllocateInfo->descriptorPool, i, pDescriptorSets); + return vk_error(device, VK_ERROR_FRAGMENTED_POOL); + } + } + } + + dzn_descriptor_set *set = NULL; + for (; set_idx < pool->set_count; set_idx++) { + if (!pool->sets[set_idx].layout) { + set = &pool->sets[set_idx]; + break; + } + } + + dzn_descriptor_set_init(set, device, pool, layout); + pDescriptorSets[i] = dzn_descriptor_set_to_handle(set); + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_FreeDescriptorSets(VkDevice dev, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(dzn_descriptor_pool, pool, descriptorPool); + VK_FROM_HANDLE(dzn_device, device, dev); + + for (uint32_t s = 0; s < count; s++) { + VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[s]); + + if (!set) + continue; + + assert(set->pool == pool); + + dzn_descriptor_set_finish(set); + } + + mtx_lock(&pool->defragment_lock); + dzn_foreach_pool_type(type) + pool->free_offset[type] = 0; + + for (uint32_t s = 0; s < pool->set_count; s++) { + const dzn_descriptor_set *set = &pool->sets[s]; + + if (set->layout) { + dzn_foreach_pool_type (type) { + pool->free_offset[type] = + MAX2(pool->free_offset[type], + set->heap_offsets[type] + + set->layout->range_desc_count[type]); + } + } + } + mtx_unlock(&pool->defragment_lock); + + return VK_SUCCESS; +} + +static void +dzn_descriptor_set_write(const VkWriteDescriptorSet *pDescriptorWrite) +{ + VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorWrite->dstSet); + + dzn_descriptor_set_ptr ptr; + + dzn_descriptor_set_ptr_init(set, &ptr, + pDescriptorWrite->dstBinding, + pDescriptorWrite->dstArrayElement); + uint32_t desc_count = pDescriptorWrite->descriptorCount; + + uint32_t d = 0; + bool cube_as_2darray = + pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + + switch (pDescriptorWrite->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; + VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler); + + if (sampler) + dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler); + + d++; + } + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; + VK_FROM_HANDLE(dzn_sampler, sampler, pImageInfo->sampler); + VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView); + + if (sampler) + dzn_descriptor_set_write_sampler_desc(set, &ptr, sampler); + + if (iview) + dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview); + + d++; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorImageInfo *pImageInfo = pDescriptorWrite->pImageInfo + d; + VK_FROM_HANDLE(dzn_image_view, iview, pImageInfo->imageView); + + if (iview) + dzn_descriptor_set_write_image_view_desc(set, &ptr, cube_as_2darray, iview); + + d++; + } + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d]; + dzn_buffer_desc desc { + pDescriptorWrite->descriptorType, + dzn_buffer_from_handle(binfo->buffer), + binfo->range, binfo->offset + }; + + if (desc.buffer) + dzn_descriptor_set_write_buffer_desc(set, &ptr, &desc); + + d++; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + const VkDescriptorBufferInfo *binfo = &pDescriptorWrite->pBufferInfo[d]; + dzn_buffer_desc desc { + pDescriptorWrite->descriptorType, + dzn_buffer_from_handle(binfo->buffer), + binfo->range, binfo->offset + }; + + if (desc.buffer) + dzn_descriptor_set_write_dynamic_buffer_desc(set, &ptr, &desc); + + d++; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (; dzn_descriptor_set_ptr_is_valid(&ptr) && d < desc_count; + dzn_descriptor_set_ptr_move(set, &ptr, 1)) { + assert(dzn_descriptor_set_get_desc_vk_type(set, &ptr) == pDescriptorWrite->descriptorType); + VK_FROM_HANDLE(dzn_buffer_view, bview, pDescriptorWrite->pTexelBufferView[d]); + + if (bview) + dzn_descriptor_set_write_buffer_view_desc(set, &ptr, bview); + + d++; + } + break; + + default: + unreachable("invalid descriptor type"); + break; + } + + assert(d == pDescriptorWrite->descriptorCount); +} + +static void +dzn_descriptor_set_copy(const VkCopyDescriptorSet *pDescriptorCopy) +{ + VK_FROM_HANDLE(dzn_descriptor_set, src_set, pDescriptorCopy->srcSet); + VK_FROM_HANDLE(dzn_descriptor_set, dst_set, pDescriptorCopy->dstSet); + dzn_descriptor_set_ptr src_ptr, dst_ptr; + + dzn_descriptor_set_ptr_init(src_set, &src_ptr, + pDescriptorCopy->srcBinding, + pDescriptorCopy->srcArrayElement); + dzn_descriptor_set_ptr_init(dst_set, &dst_ptr, + pDescriptorCopy->dstBinding, + pDescriptorCopy->dstArrayElement); + + uint32_t copied_count = 0; + + while (dzn_descriptor_set_ptr_is_valid(&src_ptr) && + dzn_descriptor_set_ptr_is_valid(&dst_ptr)) { + VkDescriptorType src_type = + dzn_descriptor_set_get_desc_vk_type(src_set, &src_ptr); + VkDescriptorType dst_type = + dzn_descriptor_set_get_desc_vk_type(dst_set, &dst_ptr); + + assert(copied_count < pDescriptorCopy->descriptorCount); + assert(src_type == dst_type); + uint32_t count = + MIN2(dzn_descriptor_set_remaining_descs_in_binding(src_set, &src_ptr), + dzn_descriptor_set_remaining_descs_in_binding(dst_set, &dst_ptr)); + + if (src_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + src_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + uint32_t src_idx = + dzn_descriptor_set_get_dynamic_buffer_idx(src_set, &src_ptr); + uint32_t dst_idx = + dzn_descriptor_set_get_dynamic_buffer_idx(dst_set, &dst_ptr); + + memcpy(&dst_set->dynamic_buffers[dst_idx], + &src_set->dynamic_buffers[src_idx], + sizeof(*dst_set->dynamic_buffers) * count); + } else { + dzn_foreach_pool_type(type) { + uint32_t src_heap_offset = + dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, false); + uint32_t dst_heap_offset = + dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, false); + + if (src_heap_offset == ~0) { + assert(dst_heap_offset == ~0); + continue; + } + + mtx_lock(&src_set->pool->defragment_lock); + mtx_lock(&dst_set->pool->defragment_lock); + dzn_descriptor_heap_copy(&dst_set->pool->heaps[type], + dst_set->heap_offsets[type] + dst_heap_offset, + &src_set->pool->heaps[type], + src_set->heap_offsets[type] + src_heap_offset, + count); + + if (dzn_descriptor_type_depends_on_shader_usage(src_type)) { + src_heap_offset = + dzn_descriptor_set_get_heap_offset(src_set, type, &src_ptr, true); + dst_heap_offset = + dzn_descriptor_set_get_heap_offset(dst_set, type, &dst_ptr, true); + assert(src_heap_offset != ~0); + assert(dst_heap_offset != ~0); + dzn_descriptor_heap_copy(&dst_set->pool->heaps[type], + dst_set->heap_offsets[type] + dst_heap_offset, + &src_set->pool->heaps[type], + src_set->heap_offsets[type] + src_heap_offset, + count); + } + mtx_unlock(&dst_set->pool->defragment_lock); + mtx_unlock(&src_set->pool->defragment_lock); + } + } + + dzn_descriptor_set_ptr_move(src_set, &src_ptr, count); + dzn_descriptor_set_ptr_move(dst_set, &dst_ptr, count); + copied_count += count; + } + + assert(copied_count == pDescriptorCopy->descriptorCount); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_UpdateDescriptorSets(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + VK_FROM_HANDLE(dzn_device, dev, _device); + + for (unsigned i = 0; i < descriptorWriteCount; i++) + dzn_descriptor_set_write(&pDescriptorWrites[i]); + + for (unsigned i = 0; i < descriptorCopyCount; i++) + dzn_descriptor_set_copy(&pDescriptorCopies[i]); +} diff --git a/src/microsoft/vulkan/dzn_device.cpp b/src/microsoft/vulkan/dzn_device.cpp new file mode 100644 index 00000000000..f1722261612 --- /dev/null +++ b/src/microsoft/vulkan/dzn_device.cpp @@ -0,0 +1,2632 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_common_entrypoints.h" +#include "vk_cmd_enqueue_entrypoints.h" +#include "vk_debug_report.h" +#include "vk_format.h" +#include "vk_sync_dummy.h" +#include "vk_util.h" + +#include "util/debug.h" +#include "util/macros.h" + +#include "glsl_types.h" + +#include +#include +#include + +#include +#include + +#if defined(VK_USE_PLATFORM_WIN32_KHR) || \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) +#define DZN_USE_WSI_PLATFORM +#endif + +#define DZN_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) + +static const vk_instance_extension_table instance_extensions = { + .KHR_get_physical_device_properties2 = true, +#ifdef DZN_USE_WSI_PLATFORM + .KHR_surface = true, +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + .KHR_win32_surface = true, +#endif +#ifdef VK_USE_PLATFORM_DISPLAY_KHR + .KHR_display = true, + .KHR_get_display_properties2 = true, + .EXT_direct_mode_display = true, + .EXT_display_surface_counter = true, +#endif + .EXT_debug_report = true, + .EXT_debug_utils = true, +}; + +static void +dzn_physical_device_get_extensions(dzn_physical_device *pdev) +{ + pdev->vk.supported_extensions = vk_device_extension_table { +#ifdef DZN_USE_WSI_PLATFORM + .KHR_swapchain = true, +#endif + }; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + /* We don't support any layers */ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + return vk_enumerate_instance_extension_properties( + &instance_extensions, pPropertyCount, pProperties); +} + +static const struct debug_control dzn_debug_options[] = { + { "sync", DZN_DEBUG_SYNC }, + { "nir", DZN_DEBUG_NIR }, + { "dxil", DZN_DEBUG_DXIL }, + { "warp", DZN_DEBUG_WARP }, + { "internal", DZN_DEBUG_INTERNAL }, + { "signature", DZN_DEBUG_SIG }, + { "gbv", DZN_DEBUG_GBV }, + { NULL, 0 } +}; + +static void +dzn_physical_device_destroy(dzn_physical_device *pdev) +{ + dzn_instance *instance = container_of(pdev->vk.instance, dzn_instance, vk); + + list_del(&pdev->link); + + if (pdev->dev) + pdev->dev->Release(); + + if (pdev->adapter) + pdev->adapter->Release(); + + dzn_wsi_finish(pdev); + vk_physical_device_finish(&pdev->vk); + vk_free(&instance->vk.alloc, pdev); +} + +static void +dzn_instance_destroy(dzn_instance *instance, const VkAllocationCallbacks *alloc) +{ + if (!instance) + return; + + if (instance->dxc.validator) + instance->dxc.validator->Release(); + + if (instance->dxc.library) + instance->dxc.library->Release(); + + if (instance->dxc.compiler) + instance->dxc.compiler->Release(); + + list_for_each_entry_safe(dzn_physical_device, pdev, + &instance->physical_devices, link) { + dzn_physical_device_destroy(pdev); + } + + vk_instance_finish(&instance->vk); + vk_free2(vk_default_allocator(), alloc, instance); +} + +static VkResult +dzn_instance_create(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *out) +{ + dzn_instance *instance = (dzn_instance *) + vk_zalloc2(vk_default_allocator(), pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_instance_dispatch_table dispatch_table; + vk_instance_dispatch_table_from_entrypoints(&dispatch_table, + &dzn_instance_entrypoints, + true); + + VkResult result = + vk_instance_init(&instance->vk, &instance_extensions, + &dispatch_table, pCreateInfo, + pAllocator ? pAllocator : vk_default_allocator()); + if (result != VK_SUCCESS) { + vk_free2(vk_default_allocator(), pAllocator, instance); + return result; + } + + list_inithead(&instance->physical_devices); + instance->physical_devices_enumerated = false; + instance->debug_flags = + parse_debug_string(getenv("DZN_DEBUG"), dzn_debug_options); + + instance->dxc.validator = dxil_get_validator(); + instance->dxc.library = dxc_get_library(); + instance->dxc.compiler = dxc_get_compiler(); + instance->d3d12.serialize_root_sig = d3d12_get_serialize_root_sig(); + + if (!instance->dxc.validator || + !instance->dxc.library || + !instance->dxc.compiler || + !instance->d3d12.serialize_root_sig) { + dzn_instance_destroy(instance, pAllocator); + return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); + } + + d3d12_enable_debug_layer(); + if (instance->debug_flags & DZN_DEBUG_GBV) + d3d12_enable_gpu_validation(); + + *out = dzn_instance_to_handle(instance); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) +{ + return dzn_instance_create(pCreateInfo, pAllocator, pInstance); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyInstance(VkInstance instance, + const VkAllocationCallbacks *pAllocator) +{ + dzn_instance_destroy(dzn_instance_from_handle(instance), pAllocator); +} + +static VkResult +dzn_physical_device_create(dzn_instance *instance, + IDXGIAdapter1 *adapter, + const DXGI_ADAPTER_DESC1 *adapter_desc) +{ + dzn_physical_device *pdev = (dzn_physical_device *) + vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (!pdev) + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_physical_device_dispatch_table dispatch_table; + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, + &dzn_physical_device_entrypoints, + true); + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_physical_device_entrypoints, + false); + + VkResult result = + vk_physical_device_init(&pdev->vk, &instance->vk, + NULL, /* We set up extensions later */ + &dispatch_table); + if (result != VK_SUCCESS) { + vk_free(&instance->vk.alloc, pdev); + return result; + } + + mtx_init(&pdev->dev_lock, mtx_plain); + pdev->adapter_desc = *adapter_desc; + pdev->adapter = adapter; + adapter->AddRef(); + list_addtail(&pdev->link, &instance->physical_devices); + + vk_warn_non_conformant_implementation("dzn"); + + /* TODO: correct UUIDs */ + memset(pdev->pipeline_cache_uuid, 0, VK_UUID_SIZE); + memset(pdev->driver_uuid, 0, VK_UUID_SIZE); + memset(pdev->device_uuid, 0, VK_UUID_SIZE); + + /* TODO: something something queue families */ + + result = dzn_wsi_init(pdev); + if (result != VK_SUCCESS) { + dzn_physical_device_destroy(pdev); + return result; + } + + dzn_physical_device_get_extensions(pdev); + + uint32_t num_sync_types = 0; + pdev->sync_types[num_sync_types++] = &dzn_sync_type; + pdev->sync_types[num_sync_types++] = &vk_sync_dummy_type; + pdev->sync_types[num_sync_types] = NULL; + assert(num_sync_types <= MAX_SYNC_TYPES); + pdev->vk.supported_sync_types = pdev->sync_types; + + return VK_SUCCESS; +} + +static void +dzn_physical_device_cache_caps(dzn_physical_device *pdev) +{ + D3D_FEATURE_LEVEL checklist[] = { + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_2, + }; + + D3D12_FEATURE_DATA_FEATURE_LEVELS levels = { + .NumFeatureLevels = ARRAY_SIZE(checklist), + .pFeatureLevelsRequested = checklist, + }; + + pdev->dev->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels)); + pdev->feature_level = levels.MaxSupportedFeatureLevel; + + pdev->dev->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE1, &pdev->architecture, sizeof(pdev->architecture)); + pdev->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &pdev->options, sizeof(pdev->options)); + + pdev->queue_families[pdev->queue_family_count++] = { + .props = { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = { 0, 0, 0 }, + }, + .desc = { + .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, + }, + }; + + pdev->queue_families[pdev->queue_family_count++] = { + .props = { + .queueFlags = VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 8, + .timestampValidBits = 64, + .minImageTransferGranularity = { 0, 0, 0 }, + }, + .desc = { + .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE, + }, + }; + + pdev->queue_families[pdev->queue_family_count++] = { + .props = { + .queueFlags = VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 0, + .minImageTransferGranularity = { 0, 0, 0 }, + }, + .desc = { + .Type = D3D12_COMMAND_LIST_TYPE_COPY, + }, + }; + + assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families)); + + D3D12_COMMAND_QUEUE_DESC queue_desc = { + .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, + .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, + .NodeMask = 0, + }; + + ComPtr cmdqueue; + + pdev->dev->CreateCommandQueue(&queue_desc, + IID_PPV_ARGS(&cmdqueue)); + + uint64_t ts_freq; + cmdqueue->GetTimestampFrequency(&ts_freq); + pdev->timestamp_period = 1000000000.0f / ts_freq; +} + +static void +dzn_physical_device_init_memory(dzn_physical_device *pdev) +{ + VkPhysicalDeviceMemoryProperties *mem = &pdev->memory; + const DXGI_ADAPTER_DESC1 *desc = &pdev->adapter_desc; + + mem->memoryHeapCount = 1; + mem->memoryHeaps[0] = VkMemoryHeap { + .size = desc->SharedSystemMemory, + .flags = 0, + }; + + mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + + if (!pdev->architecture.UMA) { + mem->memoryHeaps[mem->memoryHeapCount++] = VkMemoryHeap { + .size = desc->DedicatedVideoMemory, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + }; + mem->memoryTypes[mem->memoryTypeCount++] = VkMemoryType { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .heapIndex = mem->memoryHeapCount - 1, + }; + } else { + mem->memoryHeaps[0].flags |= VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; + mem->memoryTypes[0].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + mem->memoryTypes[1].propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + + constexpr unsigned MaxTier2MemoryTypes = 3; + assert(mem->memoryTypeCount <= MaxTier2MemoryTypes); + + if (pdev->options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1) { + unsigned oldMemoryTypeCount = mem->memoryTypeCount; + VkMemoryType oldMemoryTypes[MaxTier2MemoryTypes]; + + memcpy(oldMemoryTypes, mem->memoryTypes, oldMemoryTypeCount * sizeof(VkMemoryType)); + + mem->memoryTypeCount = 0; + for (unsigned oldMemoryTypeIdx = 0; oldMemoryTypeIdx < oldMemoryTypeCount; ++oldMemoryTypeIdx) { + D3D12_HEAP_FLAGS flags[] = { + D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, + D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, + /* Note: Vulkan requires *all* images to come from the same memory type as long as + * the tiling property (and a few other misc properties) are the same. So, this + * non-RT/DS texture flag will only be used for TILING_LINEAR textures, which + * can't be render targets. + */ + D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES + }; + for (D3D12_HEAP_FLAGS flag : flags) { + pdev->heap_flags_for_mem_type[mem->memoryTypeCount] = flag; + mem->memoryTypes[mem->memoryTypeCount] = oldMemoryTypes[oldMemoryTypeIdx]; + mem->memoryTypeCount++; + } + } + } +} + +static D3D12_HEAP_FLAGS +dzn_physical_device_get_heap_flags_for_mem_type(const dzn_physical_device *pdev, + uint32_t mem_type) +{ + return pdev->heap_flags_for_mem_type[mem_type]; +} + +uint32_t +dzn_physical_device_get_mem_type_mask_for_resource(const dzn_physical_device *pdev, + const D3D12_RESOURCE_DESC *desc) +{ + if (pdev->options.ResourceHeapTier > D3D12_RESOURCE_HEAP_TIER_1) + return (1u << pdev->memory.memoryTypeCount) - 1; + + D3D12_HEAP_FLAGS deny_flag; + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + deny_flag = D3D12_HEAP_FLAG_DENY_BUFFERS; + else if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + deny_flag = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; + else + deny_flag = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + + uint32_t mask = 0; + for (unsigned i = 0; i < pdev->memory.memoryTypeCount; ++i) { + if ((pdev->heap_flags_for_mem_type[i] & deny_flag) == D3D12_HEAP_FLAG_NONE) + mask |= (1 << i); + } + return mask; +} + +static uint32_t +dzn_physical_device_get_max_mip_levels(bool is_3d) +{ + return is_3d ? 11 : 14; +} + +static uint32_t +dzn_physical_device_get_max_extent(bool is_3d) +{ + uint32_t max_mip = dzn_physical_device_get_max_mip_levels(is_3d); + + return 1 << max_mip; +} + +static uint32_t +dzn_physical_device_get_max_array_layers() +{ + return dzn_physical_device_get_max_extent(false); +} + +static ID3D12Device1 * +dzn_physical_device_get_d3d12_dev(dzn_physical_device *pdev) +{ + dzn_instance *instance = container_of(pdev->vk.instance, dzn_instance, vk); + + mtx_lock(&pdev->dev_lock); + if (!pdev->dev) { + pdev->dev = d3d12_create_device(pdev->adapter, instance->dxc.validator == nullptr); + + dzn_physical_device_cache_caps(pdev); + dzn_physical_device_init_memory(pdev); + } + mtx_unlock(&pdev->dev_lock); + + return pdev->dev; +} + +D3D12_FEATURE_DATA_FORMAT_SUPPORT +dzn_physical_device_get_format_support(dzn_physical_device *pdev, + VkFormat format) +{ + VkImageUsageFlags usage = + vk_format_is_depth_or_stencil(format) ? + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0; + VkImageAspectFlags aspects = 0; + + if (vk_format_has_depth(format)) + aspects = VK_IMAGE_ASPECT_DEPTH_BIT; + if (vk_format_has_stencil(format)) + aspects = VK_IMAGE_ASPECT_STENCIL_BIT; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = { + .Format = dzn_image_get_dxgi_format(format, usage, aspects), + }; + + ID3D12Device *dev = dzn_physical_device_get_d3d12_dev(pdev); + HRESULT hres = + dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, + &dfmt_info, sizeof(dfmt_info)); + assert(!FAILED(hres)); + + if (usage != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return dfmt_info; + + /* Depth/stencil resources have different format when they're accessed + * as textures, query the capabilities for this format too. + */ + dzn_foreach_aspect(aspect, aspects) { + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info2 = { + .Format = dzn_image_get_dxgi_format(format, 0, aspect), + }; + + hres = dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, + &dfmt_info2, sizeof(dfmt_info2)); + assert(!FAILED(hres)); + +#define DS_SRV_FORMAT_SUPPORT1_MASK \ + (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | \ + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | \ + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | \ + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_MONO_TEXT | \ + D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE | \ + D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD | \ + D3D12_FORMAT_SUPPORT1_SHADER_GATHER | \ + D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW | \ + D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON) + + dfmt_info.Support1 |= dfmt_info2.Support1 & DS_SRV_FORMAT_SUPPORT1_MASK; + dfmt_info.Support2 |= dfmt_info2.Support2; + } + + return dfmt_info; +} + +void +dzn_physical_device_get_format_properties(dzn_physical_device *pdev, + VkFormat format, + VkFormatProperties2 *properties) +{ + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = + dzn_physical_device_get_format_support(pdev, format); + VkFormatProperties *base_props = &properties->formatProperties; + + vk_foreach_struct(ext, properties->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + + if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) { + *base_props = VkFormatProperties { }; + return; + } + + ID3D12Device *dev = dzn_physical_device_get_d3d12_dev(pdev); + + *base_props = VkFormatProperties { + .linearTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + .optimalTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + }; + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER) + base_props->bufferFeatures |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + +#define TEX_FLAGS (D3D12_FORMAT_SUPPORT1_TEXTURE1D | \ + D3D12_FORMAT_SUPPORT1_TEXTURE2D | \ + D3D12_FORMAT_SUPPORT1_TEXTURE3D | \ + D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + if (dfmt_info.Support1 & TEX_FLAGS) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + if ((dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) && + (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) { + base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + } + +#define ATOMIC_FLAGS (D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | \ + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX) + if ((dfmt_info.Support2 & ATOMIC_FLAGS) == ATOMIC_FLAGS) { + base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + } + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) + base_props->bufferFeatures |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + /* Color/depth/stencil attachment cap implies input attachement cap, and input + * attachment loads are lowered to texture loads in dozen, hence the requirement + * to have shader-load support. + */ + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) { + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE) + base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { + base_props->optimalTilingFeatures |= + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } + + /* B4G4R4A4 support is required, but d3d12 doesn't support it. We map this + * format to R4G4B4A4 and adjust the SRV component-mapping to fake + * B4G4R4A4, but that forces us to limit the usage to sampling, which, + * luckily, is exactly what we need to support the required features. + */ + if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { + VkFormatFeatureFlags bgra4_req_features = + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + base_props->optimalTilingFeatures &= bgra4_req_features; + base_props->bufferFeatures = + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + } +} + +VkResult +dzn_physical_device_get_image_format_properties(dzn_physical_device *pdev, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties2 *properties) +{ + const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; + VkExternalImageFormatProperties *external_props = NULL; + + *properties = VkImageFormatProperties2 { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; + + /* Extract input structs */ + vk_foreach_struct_const(s, info->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: + external_info = (const VkPhysicalDeviceExternalImageFormatInfo *)s; + break; + default: + dzn_debug_ignored_stype(s->sType); + break; + } + } + + assert(info->tiling == VK_IMAGE_TILING_OPTIMAL || info->tiling == VK_IMAGE_TILING_LINEAR); + + /* Extract output structs */ + vk_foreach_struct(s, properties->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: + external_props = (VkExternalImageFormatProperties *)s; + break; + default: + dzn_debug_ignored_stype(s->sType); + break; + } + } + + assert((external_props != NULL) == (external_info != NULL)); + + /* TODO: support image import */ + if (external_info && external_info->handleType != 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (info->tiling != VK_IMAGE_TILING_OPTIMAL && + (info->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (info->tiling != VK_IMAGE_TILING_OPTIMAL && + vk_format_is_depth_or_stencil(info->format)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = + dzn_physical_device_get_format_support(pdev, info->format); + if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + bool is_bgra4 = info->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16; + ID3D12Device *dev = dzn_physical_device_get_d3d12_dev(pdev); + + if ((info->type == VK_IMAGE_TYPE_1D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D)) || + (info->type == VK_IMAGE_TYPE_2D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D)) || + (info->type == VK_IMAGE_TYPE_3D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D)) || + ((info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) && + !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && + (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) || is_bgra4)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + bool is_3d = info->type == VK_IMAGE_TYPE_3D; + uint32_t max_extent = dzn_physical_device_get_max_extent(is_3d); + + if (info->tiling == VK_IMAGE_TILING_OPTIMAL && + dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MIP) + properties->imageFormatProperties.maxMipLevels = dzn_physical_device_get_max_mip_levels(is_3d); + else + properties->imageFormatProperties.maxMipLevels = 1; + + if (info->tiling == VK_IMAGE_TILING_OPTIMAL) + properties->imageFormatProperties.maxArrayLayers = dzn_physical_device_get_max_array_layers(); + else + properties->imageFormatProperties.maxArrayLayers = 1; + + switch (info->type) { + case VK_IMAGE_TYPE_1D: + properties->imageFormatProperties.maxExtent.width = max_extent; + properties->imageFormatProperties.maxExtent.height = 1; + properties->imageFormatProperties.maxExtent.depth = 1; + break; + case VK_IMAGE_TYPE_2D: + properties->imageFormatProperties.maxExtent.width = max_extent; + properties->imageFormatProperties.maxExtent.height = max_extent; + properties->imageFormatProperties.maxExtent.depth = 1; + break; + case VK_IMAGE_TYPE_3D: + if (info->tiling != VK_IMAGE_TILING_OPTIMAL) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + properties->imageFormatProperties.maxExtent.width = max_extent; + properties->imageFormatProperties.maxExtent.height = max_extent; + properties->imageFormatProperties.maxExtent.depth = max_extent; + break; + default: + unreachable("bad VkImageType"); + } + + /* From the Vulkan 1.0 spec, section 34.1.1. Supported Sample Counts: + * + * sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the + * following conditions is true: + * + * - tiling is VK_IMAGE_TILING_LINEAR + * - type is not VK_IMAGE_TYPE_2D + * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT + * - neither the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT flag nor the + * VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT flag in + * VkFormatProperties::optimalTilingFeatures returned by + * vkGetPhysicalDeviceFormatProperties is set. + * + * D3D12 has a few more constraints: + * - no UAVs on multisample resources + */ + bool rt_or_ds_cap = + dfmt_info.Support1 & + (D3D12_FORMAT_SUPPORT1_RENDER_TARGET | D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL); + + properties->imageFormatProperties.sampleCounts = VK_SAMPLE_COUNT_1_BIT; + if (info->tiling != VK_IMAGE_TILING_LINEAR && + info->type == VK_IMAGE_TYPE_2D && + !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + rt_or_ds_cap && + !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + for (uint32_t s = VK_SAMPLE_COUNT_2_BIT; s < VK_SAMPLE_COUNT_64_BIT; s <<= 1) { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = { + .Format = dfmt_info.Format, + .SampleCount = s, + }; + + HRESULT hres = + dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &ms_info, sizeof(ms_info)); + if (!FAILED(hres) && ms_info.NumQualityLevels > 0) + properties->imageFormatProperties.sampleCounts |= s; + } + } + + /* TODO: set correct value here */ + properties->imageFormatProperties.maxResourceSize = UINT32_MAX; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2 *pFormatProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + dzn_physical_device_get_format_properties(pdev, format, pFormatProperties); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties2 *props) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + return dzn_physical_device_get_image_format_properties(pdev, info, props); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties *pImageFormatProperties) +{ + const VkPhysicalDeviceImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = type, + .tiling = tiling, + .usage = usage, + .flags = createFlags, + }; + + VkImageFormatProperties2 props = {}; + + VkResult result = + dzn_GetPhysicalDeviceImageFormatProperties2(physicalDevice, &info, &props); + *pImageFormatProperties = props.imageFormatProperties; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkSampleCountFlagBits samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties *pProperties) +{ + *pPropertyCount = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2 *pProperties) +{ + *pPropertyCount = 0; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) +{ + pExternalBufferProperties->externalMemoryProperties = + VkExternalMemoryProperties { + .compatibleHandleTypes = (VkExternalMemoryHandleTypeFlags)pExternalBufferInfo->handleType, + }; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumeratePhysicalDevices(VkInstance inst, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + VK_FROM_HANDLE(dzn_instance, instance, inst); + + if (!instance->physical_devices_enumerated) { + ComPtr factory = dxgi_get_factory(false); + ComPtr adapter(NULL); + for (UINT i = 0; SUCCEEDED(factory->EnumAdapters1(i, &adapter)); ++i) { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + if (instance->debug_flags & DZN_DEBUG_WARP) { + if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) == 0) + continue; + } + + VkResult result = + dzn_physical_device_create(instance, adapter.Get(), &desc); + if (result != VK_SUCCESS) + return result; + } + } + + VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, + pPhysicalDeviceCount); + + list_for_each_entry(dzn_physical_device, pdev, &instance->physical_devices, link) { + vk_outarray_append_typed(VkPhysicalDevice, &out, i) + *i = dzn_physical_device_to_handle(pdev); + } + + instance->physical_devices_enumerated = true; + return vk_outarray_status(&out); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumerateInstanceVersion(uint32_t *pApiVersion) +{ + *pApiVersion = DZN_API_VERSION; + return VK_SUCCESS; +} + +bool +dzn_physical_device_supports_compressed_format(dzn_physical_device *pdev, + const VkFormat *formats, + uint32_t format_count) +{ +#define REQUIRED_COMPRESSED_CAPS \ + (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | \ + VK_FORMAT_FEATURE_BLIT_SRC_BIT | \ + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) + for (uint32_t i = 0; i < format_count; i++) { + VkFormatProperties2 props = {}; + dzn_physical_device_get_format_properties(pdev, formats[i], &props); + if ((props.formatProperties.optimalTilingFeatures & REQUIRED_COMPRESSED_CAPS) != REQUIRED_COMPRESSED_CAPS) + return false; + } + + return true; +} + +bool +dzn_physical_device_supports_bc(dzn_physical_device *pdev) +{ + static const VkFormat formats[] = { + VK_FORMAT_BC1_RGB_UNORM_BLOCK, + VK_FORMAT_BC1_RGB_SRGB_BLOCK, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + }; + + return dzn_physical_device_supports_compressed_format(pdev, formats, ARRAY_SIZE(formats)); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures *pFeatures) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + *pFeatures = VkPhysicalDeviceFeatures { + .robustBufferAccess = true, /* This feature is mandatory */ + .fullDrawIndexUint32 = false, + .imageCubeArray = true, + .independentBlend = false, + .geometryShader = false, + .tessellationShader = false, + .sampleRateShading = false, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = false, + .alphaToOne = false, + .multiViewport = false, + .samplerAnisotropy = false, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = false, + .textureCompressionBC = dzn_physical_device_supports_bc(pdev), + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = true, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = false, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = false, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + dzn_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); + + vk_foreach_struct(ext, pFeatures->pNext) { + dzn_debug_ignored_stype(ext->sType); + } +} + + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +dzn_GetInstanceProcAddr(VkInstance _instance, + const char *pName) +{ + VK_FROM_HANDLE(dzn_instance, instance, _instance); + return vk_instance_get_proc_addr(&instance->vk, + &dzn_instance_entrypoints, + pName); +} + +/* Windows will use a dll definition file to avoid build errors. */ +#ifdef _WIN32 +#undef PUBLIC +#define PUBLIC +#endif + +/* With version 1+ of the loader interface the ICD should expose + * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. + */ +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, + const char *pName); + +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, + const char *pName) +{ + return dzn_GetInstanceProcAddr(instance, pName); +} + +/* With version 4+ of the loader interface the ICD should expose + * vk_icdGetPhysicalDeviceProcAddr() + */ +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, + const char* pName) +{ + VK_FROM_HANDLE(dzn_instance, instance, _instance); + return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); +} + +/* vk_icd.h does not declare this function, so we declare it here to + * suppress Wmissing-prototypes. + */ +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) +{ + /* For the full details on loader interface versioning, see + * . + * What follows is a condensed summary, to help you navigate the large and + * confusing official doc. + * + * - Loader interface v0 is incompatible with later versions. We don't + * support it. + * + * - In loader interface v1: + * - The first ICD entrypoint called by the loader is + * vk_icdGetInstanceProcAddr(). The ICD must statically expose this + * entrypoint. + * - The ICD must statically expose no other Vulkan symbol unless it is + * linked with -Bsymbolic. + * - Each dispatchable Vulkan handle created by the ICD must be + * a pointer to a struct whose first member is VK_LOADER_DATA. The + * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. + * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and + * vkDestroySurfaceKHR(). The ICD must be capable of working with + * such loader-managed surfaces. + * + * - Loader interface v2 differs from v1 in: + * - The first ICD entrypoint called by the loader is + * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must + * statically expose this entrypoint. + * + * - Loader interface v3 differs from v2 in: + * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), + * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, + * because the loader no longer does so. + * + * - Loader interface v4 differs from v3 in: + * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). + */ + *pSupportedVersion = MIN2(*pSupportedVersion, 4u); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); + + /* minimum from the spec */ + const VkSampleCountFlags supported_sample_counts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + + /* FIXME: this is mostly bunk for now */ + VkPhysicalDeviceLimits limits = { + + /* TODO: support older feature levels */ + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 11), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 11), + + /* from here on, we simply use the minimum values from the spec for now */ + .maxTexelBufferElements = 65536, + .maxUniformBufferRange = 16384, + .maxStorageBufferRange = (1ul << 27), + .maxPushConstantsSize = 128, + .maxMemoryAllocationCount = 4096, + .maxSamplerAllocationCount = 4000, + .bufferImageGranularity = 131072, + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = 16, + .maxPerStageDescriptorUniformBuffers = 12, + .maxPerStageDescriptorStorageBuffers = 4, + .maxPerStageDescriptorSampledImages = 16, + .maxPerStageDescriptorStorageImages = 4, + .maxPerStageDescriptorInputAttachments = 4, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 96, + .maxDescriptorSetUniformBuffers = 72, + .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, + .maxDescriptorSetStorageBuffers = 24, + .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, + .maxDescriptorSetSampledImages = 96, + .maxDescriptorSetStorageImages = 24, + .maxDescriptorSetInputAttachments = 4, + .maxVertexInputAttributes = 16, + .maxVertexInputBindings = 16, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 64, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 0, + .maxGeometryInputComponents = 0, + .maxGeometryOutputComponents = 0, + .maxGeometryOutputVertices = 0, + .maxGeometryTotalOutputComponents = 0, + .maxFragmentInputComponents = 64, + .maxFragmentOutputAttachments = 4, + .maxFragmentDualSrcAttachments = 0, + .maxFragmentCombinedOutputResources = 4, + .maxComputeSharedMemorySize = 16384, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 128, + .maxComputeWorkGroupSize = { 128, 128, 64 }, + .subPixelPrecisionBits = 4, + .subTexelPrecisionBits = 4, + .mipmapPrecisionBits = 4, + .maxDrawIndexedIndexValue = 0x00ffffff, + .maxDrawIndirectCount = 1, + .maxSamplerLodBias = 2.0f, + .maxSamplerAnisotropy = 1.0f, + .maxViewports = 1, + .maxViewportDimensions = { 4096, 4096 }, + .viewportBoundsRange = { -8192, 8191 }, + .viewportSubPixelBits = 0, + .minMemoryMapAlignment = 64, + .minTexelBufferOffsetAlignment = 256, + .minUniformBufferOffsetAlignment = 256, + .minStorageBufferOffsetAlignment = 256, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = 0, + .maxTexelGatherOffset = 0, + .minInterpolationOffset = 0.0f, + .maxInterpolationOffset = 0.0f, + .subPixelInterpolationOffsetBits = 0, + .maxFramebufferWidth = 4096, + .maxFramebufferHeight = 4096, + .maxFramebufferLayers = 256, + .framebufferColorSampleCounts = supported_sample_counts, + .framebufferDepthSampleCounts = supported_sample_counts, + .framebufferStencilSampleCounts = supported_sample_counts, + .framebufferNoAttachmentsSampleCounts = supported_sample_counts, + .maxColorAttachments = 4, + .sampledImageColorSampleCounts = supported_sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = supported_sample_counts, + .sampledImageStencilSampleCounts = supported_sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = true, + .timestampPeriod = pdevice->timestamp_period, + .maxClipDistances = 8, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .discreteQueuePriorities = 2, + .pointSizeRange = { 1.0f, 1.0f }, + .lineWidthRange = { 1.0f, 1.0f }, + .pointSizeGranularity = 0.0f, + .lineWidthGranularity = 0.0f, + .strictLines = 0, + .standardSampleLocations = false, + .optimalBufferCopyOffsetAlignment = 1, + .optimalBufferCopyRowPitchAlignment = 1, + .nonCoherentAtomSize = 256, + }; + + const DXGI_ADAPTER_DESC1& desc = pdevice->adapter_desc; + + VkPhysicalDeviceType devtype = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + if (desc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE) + devtype = VK_PHYSICAL_DEVICE_TYPE_CPU; + else if (false) { // TODO: detect discreete GPUs + /* This is a tad tricky to get right, because we need to have the + * actual ID3D12Device before we can query the + * D3D12_FEATURE_DATA_ARCHITECTURE structure... So for now, let's + * just pretend everything is integrated, because... well, that's + * what I have at hand right now ;) + */ + devtype = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; + } + + *pProperties = VkPhysicalDeviceProperties { + .apiVersion = DZN_API_VERSION, + .driverVersion = vk_get_driver_version(), + + .vendorID = desc.VendorId, + .deviceID = desc.DeviceId, + .deviceType = devtype, + + .limits = limits, + .sparseProperties = { 0 }, + }; + + snprintf(pProperties->deviceName, sizeof(pProperties->deviceName), + "Microsoft Direct3D12 (%S)", desc.Description); + + memcpy(pProperties->pipelineCacheUUID, + pdevice->pipeline_cache_uuid, VK_UUID_SIZE); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2 *pProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); + + dzn_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); + + vk_foreach_struct(ext, pProperties->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + VkPhysicalDeviceIDProperties *id_props = + (VkPhysicalDeviceIDProperties *)ext; + memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + /* The LUID is for Windows. */ + id_props->deviceLUIDValid = false; + break; + } + default: + dzn_debug_ignored_stype(ext->sType); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, + pQueueFamilyProperties, pQueueFamilyPropertyCount); + + (void)dzn_physical_device_get_d3d12_dev(pdev); + + for (uint32_t i = 0; i < pdev->queue_family_count; i++) { + vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { + p->queueFamilyProperties = pdev->queue_families[i].props; + + vk_foreach_struct(ext, pQueueFamilyProperties->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + } + } +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pMemoryProperties) +{ + VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); + + // Ensure memory caps are up-to-date + (void)dzn_physical_device_get_d3d12_dev(pdev); + *pMemoryProperties = pdev->memory; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + dzn_GetPhysicalDeviceMemoryProperties(physicalDevice, + &pMemoryProperties->memoryProperties); + + vk_foreach_struct(ext, pMemoryProperties->pNext) { + dzn_debug_ignored_stype(ext->sType); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); +} + +static VkResult +dzn_queue_sync_wait(dzn_queue *queue, const struct vk_sync_wait *wait) +{ + if (wait->sync->type == &vk_sync_dummy_type) + return VK_SUCCESS; + + dzn_device *device = container_of(queue->vk.base.device, dzn_device, vk); + assert(wait->sync->type == &dzn_sync_type); + dzn_sync *sync = container_of(wait->sync, dzn_sync, vk); + uint64_t value = + (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? wait->wait_value : 1; + + assert(sync->fence != NULL); + + if (value > 0 && FAILED(queue->cmdqueue->Wait(sync->fence, value))) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +static VkResult +dzn_queue_sync_signal(dzn_queue *queue, const struct vk_sync_signal *signal) +{ + if (signal->sync->type == &vk_sync_dummy_type) + return VK_SUCCESS; + + dzn_device *device = container_of(queue->vk.base.device, dzn_device, vk); + assert(signal->sync->type == &dzn_sync_type); + dzn_sync *sync = container_of(signal->sync, dzn_sync, vk); + uint64_t value = + (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? signal->signal_value : 1; + assert(value > 0); + + assert(sync->fence != NULL); + + if (FAILED(queue->cmdqueue->Signal(sync->fence, value))) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +static VkResult +dzn_queue_submit(struct vk_queue *q, + struct vk_queue_submit *info) +{ + dzn_queue *queue = container_of(q, dzn_queue, vk); + dzn_device *device = container_of(q->base.device, dzn_device, vk); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; i < info->wait_count; i++) { + result = dzn_queue_sync_wait(queue, &info->waits[i]); + if (result != VK_SUCCESS) + return result; + } + + for (uint32_t i = 0; i < info->command_buffer_count; i++) { + dzn_cmd_buffer *cmd_buffer = + container_of(info->command_buffers[i], dzn_cmd_buffer, vk); + + ID3D12CommandList *cmdlists[] = { cmd_buffer->cmdlist }; + + util_dynarray_foreach(&cmd_buffer->events.wait, dzn_event *, evt) { + if (FAILED(queue->cmdqueue->Wait((*evt)->fence, 1))) + return vk_error(device, VK_ERROR_UNKNOWN); + } + + util_dynarray_foreach(&cmd_buffer->queries.wait, dzn_cmd_buffer_query_range, range) { + mtx_lock(&range->qpool->queries_lock); + for (uint32_t q = range->start; q < range->start + range->count; q++) { + struct dzn_query *query = &range->qpool->queries[q]; + + if (query->fence && + FAILED(queue->cmdqueue->Wait(query->fence, query->fence_value))) + return vk_error(device, VK_ERROR_UNKNOWN); + } + mtx_unlock(&range->qpool->queries_lock); + } + + util_dynarray_foreach(&cmd_buffer->queries.reset, dzn_cmd_buffer_query_range, range) { + mtx_lock(&range->qpool->queries_lock); + for (uint32_t q = range->start; q < range->start + range->count; q++) { + struct dzn_query *query = &range->qpool->queries[q]; + if (query->fence) { + query->fence->Release(); + query->fence = NULL; + } + query->fence_value = 0; + } + mtx_unlock(&range->qpool->queries_lock); + } + + queue->cmdqueue->ExecuteCommandLists(1, cmdlists); + + util_dynarray_foreach(&cmd_buffer->events.signal, dzn_cmd_event_signal, evt) { + if (FAILED(queue->cmdqueue->Signal(evt->event->fence, evt->value ? 1 : 0))) + return vk_error(device, VK_ERROR_UNKNOWN); + } + + util_dynarray_foreach(&cmd_buffer->queries.signal, dzn_cmd_buffer_query_range, range) { + mtx_lock(&range->qpool->queries_lock); + for (uint32_t q = range->start; q < range->start + range->count; q++) { + struct dzn_query *query = &range->qpool->queries[q]; + query->fence_value = queue->fence_point + 1; + query->fence = queue->fence; + query->fence->AddRef(); + } + mtx_unlock(&range->qpool->queries_lock); + } + } + + for (uint32_t i = 0; i < info->signal_count; i++) { + result = dzn_queue_sync_signal(queue, &info->signals[i]); + if (result != VK_SUCCESS) + return vk_error(device, VK_ERROR_UNKNOWN); + } + + if (FAILED(queue->cmdqueue->Signal(queue->fence, ++queue->fence_point))) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +static void +dzn_queue_finish(dzn_queue *queue) +{ + if (queue->cmdqueue) + queue->cmdqueue->Release(); + + if (queue->fence) + queue->fence->Release(); + + vk_queue_finish(&queue->vk); +} + +static VkResult +dzn_queue_init(dzn_queue *queue, + dzn_device *device, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family) +{ + dzn_physical_device *pdev = container_of(device->vk.physical, dzn_physical_device, vk); + + VkResult result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family); + if (result != VK_SUCCESS) + return result; + + queue->vk.driver_submit = dzn_queue_submit; + + assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count); + + D3D12_COMMAND_QUEUE_DESC queue_desc = + pdev->queue_families[pCreateInfo->queueFamilyIndex].desc; + + queue_desc.Priority = + (INT)(pCreateInfo->pQueuePriorities[index_in_family] * (float)D3D12_COMMAND_QUEUE_PRIORITY_HIGH); + queue_desc.NodeMask = 0; + + if (FAILED(device->dev->CreateCommandQueue(&queue_desc, + IID_PPV_ARGS(&queue->cmdqueue)))) { + dzn_queue_finish(queue); + return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); + } + + if (FAILED(device->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&queue->fence)))) { + dzn_queue_finish(queue); + return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); + } + + return VK_SUCCESS; +} + +static VkResult +check_physical_device_features(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceFeatures *features) +{ + VkPhysicalDeviceFeatures supported_features; + dzn_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); + VkBool32 *supported_feature = (VkBool32 *)&supported_features; + VkBool32 *enabled_feature = (VkBool32 *)features; + unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + return VK_SUCCESS; +} + +static VkResult +dzn_device_create_sync_for_memory(struct vk_device *device, + VkDeviceMemory memory, + bool signal_memory, + struct vk_sync **sync_out) +{ + return vk_sync_create(device, &vk_sync_dummy_type, + (enum vk_sync_flags)0, 1, sync_out); +} + +static void +dzn_device_ref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + dzn_pipeline_layout_ref(playout); +} + +static void +dzn_device_unref_pipeline_layout(struct vk_device *dev, VkPipelineLayout layout) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); + + dzn_pipeline_layout_unref(playout); +} + +static VkResult +dzn_device_query_init(dzn_device *device) +{ + /* FIXME: create the resource in the default heap */ + D3D12_HEAP_PROPERTIES hprops = + device->dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_UPLOAD); + D3D12_RESOURCE_DESC rdesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = DZN_QUERY_REFS_RES_SIZE, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + if (FAILED(device->dev->CreateCommittedResource(&hprops, + D3D12_HEAP_FLAG_NONE, + &rdesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + IID_PPV_ARGS(&device->queries.refs)))) + return vk_error(device->vk.physical, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + uint8_t *queries_ref; + if (FAILED(device->queries.refs->Map(0, NULL, (void **)&queries_ref))) + return vk_error(device->vk.physical, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(queries_ref + DZN_QUERY_REFS_ALL_ONES_OFFSET, 0xff, DZN_QUERY_REFS_SECTION_SIZE); + memset(queries_ref + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 0x0, DZN_QUERY_REFS_SECTION_SIZE); + device->queries.refs->Unmap(0, NULL); + + return VK_SUCCESS; +} + +static void +dzn_device_query_finish(dzn_device *device) +{ + if (device->queries.refs) + device->queries.refs->Release(); +} + +static void +dzn_device_destroy(dzn_device *device, const VkAllocationCallbacks *pAllocator) +{ + if (!device) + return; + + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + + vk_foreach_queue_safe(q, &device->vk) { + dzn_queue *queue = container_of(q, dzn_queue, vk); + + dzn_queue_finish(queue); + } + + dzn_device_query_finish(device); + dzn_meta_finish(device); + + if (device->dev) + device->dev->Release(); + + vk_device_finish(&device->vk); + vk_free2(&instance->vk.alloc, pAllocator, device); +} + +static VkResult +dzn_device_create(dzn_physical_device *pdev, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *out) +{ + dzn_instance *instance = container_of(pdev->vk.instance, dzn_instance, vk); + + uint32_t queue_count = 0; + for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { + const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; + queue_count += qinfo->queueCount; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, dzn_device, device, 1); + VK_MULTIALLOC_DECL(&ma, dzn_queue, queues, queue_count); + + if (!vk_multialloc_zalloc2(&ma, &instance->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_device_dispatch_table dispatch_table; + + /* For secondary command buffer support, overwrite any command entrypoints + * in the main device-level dispatch table with + * vk_cmd_enqueue_unless_primary_Cmd*. + */ + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &vk_cmd_enqueue_unless_primary_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &dzn_device_entrypoints, false); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_device_entrypoints, false); + + /* Populate our primary cmd_dispatch table. */ + vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, + &dzn_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, + &vk_common_device_entrypoints, + false); + + VkResult result = + vk_device_init(&device->vk, &pdev->vk, &dispatch_table, pCreateInfo, pAllocator); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, device); + return result; + } + + /* Must be done after vk_device_init() because this function memset(0) the + * whole struct. + */ + device->vk.command_dispatch_table = &device->cmd_dispatch; + device->vk.ref_pipeline_layout = dzn_device_ref_pipeline_layout; + device->vk.unref_pipeline_layout = dzn_device_unref_pipeline_layout; + device->vk.create_sync_for_memory = dzn_device_create_sync_for_memory; + + device->dev = dzn_physical_device_get_d3d12_dev(pdev); + if (!device->dev) { + dzn_device_destroy(device, pAllocator); + return vk_error(pdev, VK_ERROR_INITIALIZATION_FAILED); + } + + device->dev->AddRef(); + + ID3D12InfoQueue *info_queue; + if (SUCCEEDED(device->dev->QueryInterface(IID_PPV_ARGS(&info_queue)))) { + D3D12_MESSAGE_SEVERITY severities[] = { + D3D12_MESSAGE_SEVERITY_INFO, + D3D12_MESSAGE_SEVERITY_WARNING, + }; + + D3D12_MESSAGE_ID msg_ids[] = { + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + }; + + D3D12_INFO_QUEUE_FILTER NewFilter = {}; + NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities); + NewFilter.DenyList.pSeverityList = severities; + NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids); + NewFilter.DenyList.pIDList = msg_ids; + + info_queue->PushStorageFilter(&NewFilter); + } + + result = dzn_meta_init(device); + if (result != VK_SUCCESS) { + dzn_device_destroy(device, pAllocator); + return result; + } + + result = dzn_device_query_init(device); + if (result != VK_SUCCESS) { + dzn_device_destroy(device, pAllocator); + return result; + } + + uint32_t qindex = 0; + for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { + const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; + + for (uint32_t q = 0; q < qinfo->queueCount; q++) { + result = + dzn_queue_init(&queues[qindex++], device, qinfo, q); + if (result != VK_SUCCESS) { + dzn_device_destroy(device, pAllocator); + return result; + } + } + } + + assert(queue_count == qindex); + *out = dzn_device_to_handle(device); + return VK_SUCCESS; +} + +ID3D12RootSignature * +dzn_device_create_root_sig(dzn_device *device, + const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc) +{ + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + ComPtr sig, error; + + if (FAILED(instance->d3d12.serialize_root_sig(desc, + &sig, &error))) { + if (instance->debug_flags & DZN_DEBUG_SIG) { + const char* error_msg = (const char*)error->GetBufferPointer(); + fprintf(stderr, + "== SERIALIZE ROOT SIG ERROR =============================================\n" + "%s\n" + "== END ==========================================================\n", + error_msg); + } + + return NULL; + } + + ID3D12RootSignature *root_sig; + if (FAILED(device->dev->CreateRootSignature(0, + sig->GetBufferPointer(), + sig->GetBufferSize(), + IID_PPV_ARGS(&root_sig)))) + return NULL; + + return root_sig; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) +{ + VK_FROM_HANDLE(dzn_physical_device, physical_device, physicalDevice); + dzn_instance *instance = + container_of(physical_device->vk.instance, dzn_instance, vk); + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + /* Check enabled features */ + if (pCreateInfo->pEnabledFeatures) { + result = check_physical_device_features(physicalDevice, + pCreateInfo->pEnabledFeatures); + if (result != VK_SUCCESS) + return vk_error(physical_device, result); + } + + /* Check requested queues and fail if we are requested to create any + * queues with flags we don't support. + */ + assert(pCreateInfo->queueCreateInfoCount > 0); + for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + if (pCreateInfo->pQueueCreateInfos[i].flags != 0) + return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED); + } + + return dzn_device_create(physical_device, pCreateInfo, pAllocator, pDevice); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyDevice(VkDevice dev, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + + device->vk.dispatch_table.DeviceWaitIdle(dev); + + dzn_device_destroy(device, pAllocator); +} + +static void +dzn_device_memory_destroy(dzn_device_memory *mem, + const VkAllocationCallbacks *pAllocator) +{ + if (!mem) + return; + + dzn_device *device = container_of(mem->base.device, dzn_device, vk); + + if (mem->map) + mem->map_res->Unmap(0, NULL); + + if (mem->map_res) + mem->map_res->Release(); + + if (mem->heap) + mem->heap->Release(); + + vk_object_base_finish(&mem->base); + vk_free2(&device->vk.alloc, pAllocator, mem); +} + +static VkResult +dzn_device_memory_create(dzn_device *device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *out) +{ + dzn_physical_device *pdevice = + container_of(device->vk.physical, dzn_physical_device, vk); + + dzn_device_memory *mem = (dzn_device_memory *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!mem) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); + + /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ + assert(pAllocateInfo->allocationSize > 0); + + mem->size = pAllocateInfo->allocationSize; + +#if 0 + const VkExportMemoryAllocateInfo *export_info = NULL; + VkMemoryAllocateFlags vk_flags = 0; +#endif + + vk_foreach_struct_const(ext, pAllocateInfo->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + + const VkMemoryType *mem_type = + &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex]; + + D3D12_HEAP_DESC heap_desc = {}; + // TODO: fix all of these: + heap_desc.SizeInBytes = pAllocateInfo->allocationSize; + heap_desc.Alignment = + heap_desc.SizeInBytes >= D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT ? + D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heap_desc.Flags = + dzn_physical_device_get_heap_flags_for_mem_type(pdevice, + pAllocateInfo->memoryTypeIndex); + + /* TODO: Unsure about this logic??? */ + mem->initial_state = D3D12_RESOURCE_STATE_COMMON; + heap_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_desc.Properties.MemoryPoolPreference = + ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + !pdevice->architecture.UMA) ? + D3D12_MEMORY_POOL_L1 : D3D12_MEMORY_POOL_L0; + if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; + } else { + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE; + } + + if (FAILED(device->dev->CreateHeap(&heap_desc, IID_PPV_ARGS(&mem->heap)))) { + dzn_device_memory_destroy(mem, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)){ + D3D12_RESOURCE_DESC res_desc = {}; + res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + res_desc.Format = DXGI_FORMAT_UNKNOWN; + res_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + res_desc.Width = heap_desc.SizeInBytes; + res_desc.Height = 1; + res_desc.DepthOrArraySize = 1; + res_desc.MipLevels = 1; + res_desc.SampleDesc.Count = 1; + res_desc.SampleDesc.Quality = 0; + res_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + HRESULT hr = device->dev->CreatePlacedResource(mem->heap, 0, &res_desc, + mem->initial_state, + NULL, IID_PPV_ARGS(&mem->map_res)); + if (FAILED(hr)) { + dzn_device_memory_destroy(mem, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + } + + *out = dzn_device_memory_to_handle(mem); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_AllocateMemory(VkDevice device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMem) +{ + return dzn_device_memory_create(dzn_device_from_handle(device), + pAllocateInfo, pAllocator, pMem); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_FreeMemory(VkDevice device, + VkDeviceMemory mem, + const VkAllocationCallbacks *pAllocator) +{ + dzn_device_memory_destroy(dzn_device_memory_from_handle(mem), pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_MapMemory(VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->size - offset; + + /* From the Vulkan spec version 1.0.32 docs for MapMemory: + * + * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 + * assert(size != 0); + * * If size is not equal to VK_WHOLE_SIZE, size must be less than or + * equal to the size of the memory minus offset + */ + assert(size > 0); + assert(offset + size <= mem->size); + + assert(mem->map_res); + D3D12_RANGE range = {}; + range.Begin = offset; + range.End = offset + size; + void *map = NULL; + if (FAILED(mem->map_res->Map(0, &range, &map))) + return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); + + mem->map = map; + mem->map_size = size; + + *ppData = ((uint8_t*) map) + offset; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_UnmapMemory(VkDevice _device, + VkDeviceMemory _memory) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_device_memory, mem, _memory); + + if (mem == NULL) + return; + + assert(mem->map_res); + mem->map_res->Unmap(0, NULL); + + mem->map = NULL; + mem->map_size = 0; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +static void +dzn_buffer_destroy(dzn_buffer *buf, const VkAllocationCallbacks *pAllocator) +{ + if (!buf) + return; + + dzn_device *device = container_of(buf->base.device, dzn_device, vk); + + if (buf->res) + buf->res->Release(); + + vk_object_base_finish(&buf->base); + vk_free2(&device->vk.alloc, pAllocator, buf); +} + +static VkResult +dzn_buffer_create(dzn_device *device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *out) +{ + dzn_buffer *buf = (dzn_buffer *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*buf), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!buf) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &buf->base, VK_OBJECT_TYPE_BUFFER); + buf->create_flags = pCreateInfo->flags; + buf->size = pCreateInfo->size; + buf->usage = pCreateInfo->usage; + + if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) + buf->size = ALIGN_POT(buf->size, 256); + + buf->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + buf->desc.Format = DXGI_FORMAT_UNKNOWN; + buf->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + buf->desc.Width = buf->size; + buf->desc.Height = 1; + buf->desc.DepthOrArraySize = 1; + buf->desc.MipLevels = 1; + buf->desc.SampleDesc.Count = 1; + buf->desc.SampleDesc.Quality = 0; + buf->desc.Flags = D3D12_RESOURCE_FLAG_NONE; + buf->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + if (buf->usage & + (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) + buf->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + *out = dzn_buffer_to_handle(buf); + return VK_SUCCESS; +} + +DXGI_FORMAT +dzn_buffer_get_dxgi_format(VkFormat format) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(format); + + return dzn_pipe_to_dxgi_format(pfmt); +} + +D3D12_TEXTURE_COPY_LOCATION +dzn_buffer_get_copy_loc(const dzn_buffer *buf, + VkFormat format, + const VkBufferImageCopy2KHR *region, + VkImageAspectFlagBits aspect, + uint32_t layer) +{ + const uint32_t buffer_row_length = + region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; + const uint32_t buffer_image_height = + region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; + + VkFormat plane_format = dzn_image_get_plane_format(format, aspect); + + enum pipe_format pfmt = vk_format_to_pipe_format(plane_format); + uint32_t blksz = util_format_get_blocksize(pfmt); + uint32_t blkw = util_format_get_blockwidth(pfmt); + uint32_t blkh = util_format_get_blockheight(pfmt); + + D3D12_TEXTURE_COPY_LOCATION loc = { + .pResource = buf->res, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = { + .Footprint = { + .Format = + dzn_image_get_placed_footprint_format(format, aspect), + .Width = region->imageExtent.width, + .Height = region->imageExtent.height, + .Depth = region->imageExtent.depth, + .RowPitch = blksz * DIV_ROUND_UP(buffer_row_length, blkw), + }, + }, + }; + + uint32_t buffer_layer_stride = + loc.PlacedFootprint.Footprint.RowPitch * + DIV_ROUND_UP(loc.PlacedFootprint.Footprint.Height, blkh); + + loc.PlacedFootprint.Offset = + region->bufferOffset + (layer * buffer_layer_stride); + + return loc; +} + +D3D12_TEXTURE_COPY_LOCATION +dzn_buffer_get_line_copy_loc(const dzn_buffer *buf, VkFormat format, + const VkBufferImageCopy2KHR *region, + const D3D12_TEXTURE_COPY_LOCATION *loc, + uint32_t y, uint32_t z, uint32_t *start_x) +{ + uint32_t buffer_row_length = + region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; + uint32_t buffer_image_height = + region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; + + format = dzn_image_get_plane_format(format, region->imageSubresource.aspectMask); + + enum pipe_format pfmt = vk_format_to_pipe_format(format); + uint32_t blksz = util_format_get_blocksize(pfmt); + uint32_t blkw = util_format_get_blockwidth(pfmt); + uint32_t blkh = util_format_get_blockheight(pfmt); + uint32_t blkd = util_format_get_blockdepth(pfmt); + D3D12_TEXTURE_COPY_LOCATION new_loc = *loc; + uint32_t buffer_row_stride = + DIV_ROUND_UP(buffer_row_length, blkw) * blksz; + uint32_t buffer_layer_stride = + buffer_row_stride * + DIV_ROUND_UP(buffer_image_height, blkh); + + uint64_t tex_offset = + ((y / blkh) * buffer_row_stride) + + ((z / blkd) * buffer_layer_stride); + uint64_t offset = loc->PlacedFootprint.Offset + tex_offset; + uint32_t offset_alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + + while (offset_alignment % blksz) + offset_alignment += D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + + new_loc.PlacedFootprint.Footprint.Height = blkh; + new_loc.PlacedFootprint.Footprint.Depth = 1; + new_loc.PlacedFootprint.Offset = (offset / offset_alignment) * offset_alignment; + *start_x = ((offset % offset_alignment) / blksz) * blkw; + new_loc.PlacedFootprint.Footprint.Width = *start_x + region->imageExtent.width; + new_loc.PlacedFootprint.Footprint.RowPitch = + ALIGN_POT(DIV_ROUND_UP(new_loc.PlacedFootprint.Footprint.Width, blkw) * blksz, + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + return new_loc; +} + +bool +dzn_buffer_supports_region_copy(const D3D12_TEXTURE_COPY_LOCATION *loc) +{ + return !(loc->PlacedFootprint.Offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) && + !(loc->PlacedFootprint.Footprint.RowPitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateBuffer(VkDevice device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer) +{ + return dzn_buffer_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pBuffer); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyBuffer(VkDevice device, + VkBuffer buffer, + const VkAllocationCallbacks *pAllocator) +{ + dzn_buffer_destroy(dzn_buffer_from_handle(buffer), pAllocator); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetBufferMemoryRequirements2(VkDevice dev, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_buffer, buffer, pInfo->buffer); + dzn_physical_device *pdev = + container_of(device->vk.physical, dzn_physical_device, vk); + + /* uh, this is grossly over-estimating things */ + uint32_t alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + VkDeviceSize size = buffer->size; + + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + alignment = MAX2(alignment, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + size = ALIGN_POT(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + } + + pMemoryRequirements->memoryRequirements.size = size; + pMemoryRequirements->memoryRequirements.alignment = alignment; + pMemoryRequirements->memoryRequirements.memoryTypeBits = + dzn_physical_device_get_mem_type_mask_for_resource(pdev, &buffer->desc); + + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *requirements = + (VkMemoryDedicatedRequirements *)ext; + /* TODO: figure out dedicated allocations */ + requirements->prefersDedicatedAllocation = false; + requirements->requiresDedicatedAllocation = false; + break; + } + + default: + dzn_debug_ignored_stype(ext->sType); + break; + } + } + +#if 0 + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo( + UINT visibleMask, + UINT numResourceDescs, + const D3D12_RESOURCE_DESC *pResourceDescs); +#endif +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_BindBufferMemory2(VkDevice _device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + + for (uint32_t i = 0; i < bindInfoCount; i++) { + assert(pBindInfos[i].sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO); + + VK_FROM_HANDLE(dzn_device_memory, mem, pBindInfos[i].memory); + VK_FROM_HANDLE(dzn_buffer, buffer, pBindInfos[i].buffer); + + if (FAILED(device->dev->CreatePlacedResource(mem->heap, + pBindInfos[i].memoryOffset, + &buffer->desc, + mem->initial_state, + NULL, IID_PPV_ARGS(&buffer->res)))) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return VK_SUCCESS; +} + +static VkResult +dzn_framebuffer_create(dzn_device *device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *out) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, dzn_framebuffer, framebuffer, 1); + VK_MULTIALLOC_DECL(&ma, dzn_image_view *, attachments, pCreateInfo->attachmentCount); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + framebuffer->attachments = attachments; + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { + VK_FROM_HANDLE(dzn_image_view, iview, pCreateInfo->pAttachments[i]); + framebuffer->attachments[i] = iview; + } + + vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER); + *out = dzn_framebuffer_to_handle(framebuffer); + return VK_SUCCESS; +} + +static void +dzn_framebuffer_destroy(dzn_framebuffer *framebuffer, + const VkAllocationCallbacks *pAllocator) +{ + if (!framebuffer) + return; + + dzn_device *device = + container_of(framebuffer->base.device, dzn_device, vk); + + vk_object_base_finish(&framebuffer->base); + vk_free2(&device->vk.alloc, pAllocator, framebuffer); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateFramebuffer(VkDevice device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) +{ + return dzn_framebuffer_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pFramebuffer); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyFramebuffer(VkDevice device, + VkFramebuffer fb, + const VkAllocationCallbacks *pAllocator) +{ + dzn_framebuffer_destroy(dzn_framebuffer_from_handle(fb), pAllocator); +} + +static void +dzn_event_destroy(dzn_event *event, + const VkAllocationCallbacks *pAllocator) +{ + if (!event) + return; + + dzn_device *device = + container_of(event->base.device, dzn_device, vk); + + if (event->fence) + event->fence->Release(); + + vk_object_base_finish(&event->base); + vk_free2(&device->vk.alloc, pAllocator, event); +} + +static VkResult +dzn_event_create(dzn_device *device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *out) +{ + dzn_event *event = (dzn_event *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!event) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT); + + if (FAILED(device->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&event->fence)))) { + dzn_event_destroy(event, pAllocator); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + *out = dzn_event_to_handle(event); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateEvent(VkDevice device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) +{ + return dzn_event_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pEvent); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyEvent(VkDevice device, + VkEvent event, + const VkAllocationCallbacks *pAllocator) +{ + return dzn_event_destroy(dzn_event_from_handle(event), pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_ResetEvent(VkDevice dev, + VkEvent evt) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_event, event, evt); + + if (FAILED(event->fence->Signal(0))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_SetEvent(VkDevice dev, + VkEvent evt) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_event, event, evt); + + if (FAILED(event->fence->Signal(1))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetEventStatus(VkDevice device, + VkEvent evt) +{ + VK_FROM_HANDLE(dzn_event, event, evt); + + return event->fence->GetCompletedValue() == 0 ? + VK_EVENT_RESET : VK_EVENT_SET; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetDeviceMemoryCommitment(VkDevice device, + VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes) +{ + VK_FROM_HANDLE(dzn_device_memory, mem, memory); + + // TODO: find if there's a way to query/track actual heap residency + *pCommittedMemoryInBytes = mem->size; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_QueueBindSparse(VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence fence) +{ + // FIXME: add proper implem + dzn_stub(); + return VK_SUCCESS; +} + +static D3D12_TEXTURE_ADDRESS_MODE +dzn_sampler_translate_addr_mode(VkSamplerAddressMode in) +{ + switch (in) { + case VK_SAMPLER_ADDRESS_MODE_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + default: unreachable("Invalid address mode"); + } +} + +static void +dzn_sampler_destroy(dzn_sampler *sampler, + const VkAllocationCallbacks *pAllocator) +{ + if (!sampler) + return; + + dzn_device *device = + container_of(sampler->base.device, dzn_device, vk); + + vk_object_base_finish(&sampler->base); + vk_free2(&device->vk.alloc, pAllocator, sampler); +} + +static VkResult +dzn_sampler_create(dzn_device *device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *out) +{ + dzn_sampler *sampler = (dzn_sampler *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER); + + const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = (const VkSamplerCustomBorderColorCreateInfoEXT *) + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + + /* TODO: have a sampler pool to allocate shader-invisible descs which we + * can copy to the desc_set when UpdateDescriptorSets() is called. + */ + sampler->desc.Filter = dzn_translate_sampler_filter(pCreateInfo); + sampler->desc.AddressU = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeU); + sampler->desc.AddressV = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeV); + sampler->desc.AddressW = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeW); + sampler->desc.MipLODBias = pCreateInfo->mipLodBias; + sampler->desc.MaxAnisotropy = pCreateInfo->maxAnisotropy; + sampler->desc.MinLOD = pCreateInfo->minLod; + sampler->desc.MaxLOD = pCreateInfo->maxLod; + + if (pCreateInfo->compareEnable) + sampler->desc.ComparisonFunc = dzn_translate_compare_op(pCreateInfo->compareOp); + + bool reads_border_color = + pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + + if (reads_border_color) { + switch (pCreateInfo->borderColor) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + sampler->desc.BorderColor[0] = 0.0f; + sampler->desc.BorderColor[1] = 0.0f; + sampler->desc.BorderColor[2] = 0.0f; + sampler->desc.BorderColor[3] = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? 0.0f : 1.0f; + sampler->static_border_color = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? + D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK : + D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + sampler->desc.BorderColor[0] = sampler->desc.BorderColor[1] = 1.0f; + sampler->desc.BorderColor[2] = sampler->desc.BorderColor[3] = 1.0f; + sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; + break; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; + for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.BorderColor); i++) + sampler->desc.BorderColor[i] = pBorderColor->customBorderColor.float32[i]; + break; + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + /* FIXME: sampling from integer textures is not supported yet. */ + sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; + break; + default: + unreachable("Unsupported border color"); + } + } + + *out = dzn_sampler_to_handle(sampler); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateSampler(VkDevice device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) +{ + return dzn_sampler_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pSampler); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroySampler(VkDevice device, + VkSampler sampler, + const VkAllocationCallbacks *pAllocator) +{ + return dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator); +} diff --git a/src/microsoft/vulkan/dzn_image.cpp b/src/microsoft/vulkan/dzn_image.cpp new file mode 100644 index 00000000000..547540a00d0 --- /dev/null +++ b/src/microsoft/vulkan/dzn_image.cpp @@ -0,0 +1,1220 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_format.h" +#include "vk_util.h" + +static void +dzn_image_destroy(dzn_image *image, + const VkAllocationCallbacks *pAllocator) +{ + if (!image) + return; + + dzn_device *device = container_of(image->vk.base.device, dzn_device, vk); + + if (image->res) + image->res->Release(); + + vk_image_finish(&image->vk); + vk_free2(&device->vk.alloc, pAllocator, image); +} + +static VkResult +dzn_image_create(dzn_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *out) +{ + dzn_image *image = (dzn_image *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + dzn_physical_device *pdev = + container_of(device->vk.physical, dzn_physical_device, vk); + + if (!image) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + const VkExternalMemoryImageCreateInfo *create_info = + (const VkExternalMemoryImageCreateInfo *) + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); + +#if 0 + VkExternalMemoryHandleTypeFlags supported = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT; + + if (create_info && (create_info->handleTypes & supported)) + return dzn_image_from_external(device, pCreateInfo, create_info, + pAllocator, pImage); +#endif + +#if 0 + const VkImageSwapchainCreateInfoKHR *swapchain_info = (const VkImageSwapchainCreateInfoKHR *) + vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); + if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) + return dzn_image_from_swapchain(device, pCreateInfo, swapchain_info, + pAllocator, pImage); +#endif + + vk_image_init(&device->vk, &image->vk, pCreateInfo); + enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); + + if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) { + /* Treat linear images as buffers: they should only be used as copy + * src/dest, and CopyTextureResource() can manipulate buffers. + * We only support linear tiling on things strictly required by the spec: + * "Images created with tiling equal to VK_IMAGE_TILING_LINEAR have + * further restrictions on their limits and capabilities compared to + * images created with tiling equal to VK_IMAGE_TILING_OPTIMAL. Creation + * of images with tiling VK_IMAGE_TILING_LINEAR may not be supported + * unless other parameters meet all of the constraints: + * - imageType is VK_IMAGE_TYPE_2D + * - format is not a depth/stencil format + * - mipLevels is 1 + * - arrayLayers is 1 + * - samples is VK_SAMPLE_COUNT_1_BIT + * - usage only includes VK_IMAGE_USAGE_TRANSFER_SRC_BIT and/or VK_IMAGE_USAGE_TRANSFER_DST_BIT + * " + */ + assert(!vk_format_is_depth_or_stencil(pCreateInfo->format)); + assert(pCreateInfo->mipLevels == 1); + assert(pCreateInfo->arrayLayers == 1); + assert(pCreateInfo->samples == 1); + assert(pCreateInfo->imageType != VK_IMAGE_TYPE_3D); + assert(!(pCreateInfo->usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))); + D3D12_RESOURCE_DESC tmp_desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = ALIGN(image->vk.extent.width, util_format_get_blockwidth(pfmt)), + .Height = (UINT)ALIGN(image->vk.extent.height, util_format_get_blockheight(pfmt)), + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = + dzn_image_get_dxgi_format(pCreateInfo->format, pCreateInfo->usage, 0), + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; + uint64_t size = 0; + device->dev->GetCopyableFootprints(&tmp_desc, 0, 1, 0, &footprint, NULL, NULL, &size); + + image->linear.row_stride = footprint.Footprint.RowPitch; + image->linear.size = size; + size *= pCreateInfo->arrayLayers; + image->desc.Format = DXGI_FORMAT_UNKNOWN; + image->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + image->desc.Width = size; + image->desc.Height = 1; + image->desc.DepthOrArraySize = 1; + image->desc.MipLevels = 1; + image->desc.SampleDesc.Count = 1; + image->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + } else { + image->desc.Format = + dzn_image_get_dxgi_format(pCreateInfo->format, + pCreateInfo->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + 0), + image->desc.Dimension = (D3D12_RESOURCE_DIMENSION)(D3D12_RESOURCE_DIMENSION_TEXTURE1D + pCreateInfo->imageType); + image->desc.Width = image->vk.extent.width; + image->desc.Height = image->vk.extent.height; + image->desc.DepthOrArraySize = pCreateInfo->imageType == VK_IMAGE_TYPE_3D ? + image->vk.extent.depth : + pCreateInfo->arrayLayers; + image->desc.MipLevels = pCreateInfo->mipLevels; + image->desc.SampleDesc.Count = pCreateInfo->samples; + image->desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + } + + if (image->desc.SampleDesc.Count > 1) + image->desc.Alignment = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; + else + image->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + + image->desc.SampleDesc.Quality = 0; + + image->desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + + if (!(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) + image->desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + + /* Images with TRANSFER_DST can be cleared or passed as a blit/resolve + * destination. Both operations require the RT or DS cap flags. + */ + if ((image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) && + image->vk.tiling == VK_IMAGE_TILING_OPTIMAL) { + + D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = + dzn_physical_device_get_format_support(pdev, pCreateInfo->format); + if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + } else if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) { + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + } + + if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) + image->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + *out = dzn_image_to_handle(image); + return VK_SUCCESS; +} + +DXGI_FORMAT +dzn_image_get_dxgi_format(VkFormat format, + VkImageUsageFlags usage, + VkImageAspectFlags aspects) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(format); + + if (!vk_format_is_depth_or_stencil(format)) + return dzn_pipe_to_dxgi_format(pfmt); + + switch (pfmt) { + case PIPE_FORMAT_Z16_UNORM: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D16_UNORM : DXGI_FORMAT_R16_UNORM; + + case PIPE_FORMAT_Z32_FLOAT: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_R32_FLOAT; + + case PIPE_FORMAT_Z24X8_UNORM: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return DXGI_FORMAT_D24_UNORM_S8_UINT; + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + else + return DXGI_FORMAT_X24_TYPELESS_G8_UINT; + + case PIPE_FORMAT_X24S8_UINT: + return usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ? + DXGI_FORMAT_D24_UNORM_S8_UINT : DXGI_FORMAT_X24_TYPELESS_G8_UINT; + + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + if (usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT; + else if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + else + return DXGI_FORMAT_R32G8X24_TYPELESS; + + default: + return dzn_pipe_to_dxgi_format(pfmt); + } +} + +DXGI_FORMAT +dzn_image_get_placed_footprint_format(VkFormat format, + VkImageAspectFlags aspect) +{ + DXGI_FORMAT out = + dzn_image_get_dxgi_format(format, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + aspect); + + switch (out) { + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + return DXGI_FORMAT_R32_TYPELESS; + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return DXGI_FORMAT_R8_TYPELESS; + default: + return out; + } +} + +VkFormat +dzn_image_get_plane_format(VkFormat format, + VkImageAspectFlags aspectMask) +{ + if (aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) + return vk_format_stencil_only(format); + else if (aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + return vk_format_depth_only(format); + else + return format; +} + +uint32_t +dzn_image_layers_get_subresource_index(const dzn_image *image, + const VkImageSubresourceLayers *subres, + VkImageAspectFlagBits aspect, + uint32_t layer) +{ + int planeSlice = + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + return subres->mipLevel + + ((subres->baseArrayLayer + layer) * image->desc.MipLevels) + + (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); +} + +uint32_t +dzn_image_range_get_subresource_index(const dzn_image *image, + const VkImageSubresourceRange *subres, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t layer) +{ + int planeSlice = + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + return subres->baseMipLevel + level + + ((subres->baseArrayLayer + layer) * image->desc.MipLevels) + + (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); +} + +uint32_t +dzn_image_get_subresource_index(const dzn_image *image, + const VkImageSubresource *subres, + VkImageAspectFlagBits aspect) +{ + int planeSlice = + aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + return subres->mipLevel + + (subres->arrayLayer * image->desc.MipLevels) + + (planeSlice * image->desc.MipLevels * image->desc.DepthOrArraySize); +} + +D3D12_TEXTURE_COPY_LOCATION +dzn_image_get_copy_loc(const dzn_image *image, + const VkImageSubresourceLayers *subres, + VkImageAspectFlagBits aspect, + uint32_t layer) +{ + D3D12_TEXTURE_COPY_LOCATION loc = { + .pResource = image->res, + }; + + assert((subres->aspectMask & aspect) != 0); + VkFormat format = dzn_image_get_plane_format(image->vk.format, aspect); + + if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + VkImageUsageFlags usage = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + assert((subres->baseArrayLayer + layer) == 0); + assert(subres->mipLevel == 0); + loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + loc.PlacedFootprint.Offset = 0; + loc.PlacedFootprint.Footprint.Format = + dzn_image_get_placed_footprint_format(image->vk.format, aspect); + loc.PlacedFootprint.Footprint.Width = image->vk.extent.width; + loc.PlacedFootprint.Footprint.Height = image->vk.extent.height; + loc.PlacedFootprint.Footprint.Depth = image->vk.extent.depth; + loc.PlacedFootprint.Footprint.RowPitch = image->linear.row_stride; + } else { + loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + loc.SubresourceIndex = dzn_image_layers_get_subresource_index(image, subres, aspect, layer); + } + + return loc; +} + +D3D12_DEPTH_STENCIL_VIEW_DESC +dzn_image_get_dsv_desc(const dzn_image *image, + const VkImageSubresourceRange *range, + uint32_t level) +{ + uint32_t layer_count = dzn_get_layer_count(image, range); + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = { + .Format = + dzn_image_get_dxgi_format(image->vk.format, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + range->aspectMask), + }; + + switch (image->vk.image_type) { + case VK_IMAGE_TYPE_1D: + dsv_desc.ViewDimension = + image->vk.array_layers > 1 ? + D3D12_DSV_DIMENSION_TEXTURE1DARRAY : + D3D12_DSV_DIMENSION_TEXTURE1D; + break; + case VK_IMAGE_TYPE_2D: + if (image->vk.array_layers > 1) { + dsv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + } else { + dsv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_DSV_DIMENSION_TEXTURE2DMS : + D3D12_DSV_DIMENSION_TEXTURE2D; + } + break; + default: + unreachable("Invalid image type"); + } + + switch (dsv_desc.ViewDimension) { + case D3D12_DSV_DIMENSION_TEXTURE1D: + dsv_desc.Texture1D.MipSlice = range->baseMipLevel + level; + break; + case D3D12_DSV_DIMENSION_TEXTURE1DARRAY: + dsv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level; + dsv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer; + dsv_desc.Texture1DArray.ArraySize = layer_count; + break; + case D3D12_DSV_DIMENSION_TEXTURE2D: + dsv_desc.Texture2D.MipSlice = range->baseMipLevel + level; + break; + case D3D12_DSV_DIMENSION_TEXTURE2DMS: + break; + case D3D12_DSV_DIMENSION_TEXTURE2DARRAY: + dsv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level; + dsv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer; + dsv_desc.Texture2DArray.ArraySize = layer_count; + break; + } + + return dsv_desc; +} + +D3D12_RENDER_TARGET_VIEW_DESC +dzn_image_get_rtv_desc(const dzn_image *image, + const VkImageSubresourceRange *range, + uint32_t level) +{ + uint32_t layer_count = dzn_get_layer_count(image, range); + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { + .Format = + dzn_image_get_dxgi_format(image->vk.format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT), + }; + + switch (image->vk.image_type) { + case VK_IMAGE_TYPE_1D: + rtv_desc.ViewDimension = + image->vk.array_layers > 1 ? + D3D12_RTV_DIMENSION_TEXTURE1DARRAY : D3D12_RTV_DIMENSION_TEXTURE1D; + break; + case VK_IMAGE_TYPE_2D: + if (image->vk.array_layers > 1) { + rtv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + } else { + rtv_desc.ViewDimension = + image->vk.samples > 1 ? + D3D12_RTV_DIMENSION_TEXTURE2DMS : + D3D12_RTV_DIMENSION_TEXTURE2D; + } + break; + case VK_IMAGE_TYPE_3D: + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + break; + default: unreachable("Invalid image type\n"); + } + + switch (rtv_desc.ViewDimension) { + case D3D12_RTV_DIMENSION_TEXTURE1D: + rtv_desc.Texture1D.MipSlice = range->baseMipLevel + level; + break; + case D3D12_RTV_DIMENSION_TEXTURE1DARRAY: + rtv_desc.Texture1DArray.MipSlice = range->baseMipLevel + level; + rtv_desc.Texture1DArray.FirstArraySlice = range->baseArrayLayer; + rtv_desc.Texture1DArray.ArraySize = layer_count; + break; + case D3D12_RTV_DIMENSION_TEXTURE2D: + rtv_desc.Texture2D.MipSlice = range->baseMipLevel + level; + if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT) + rtv_desc.Texture2D.PlaneSlice = 1; + else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT) + rtv_desc.Texture2D.PlaneSlice = 2; + else + rtv_desc.Texture2D.PlaneSlice = 0; + break; + case D3D12_RTV_DIMENSION_TEXTURE2DMS: + break; + case D3D12_RTV_DIMENSION_TEXTURE2DARRAY: + rtv_desc.Texture2DArray.MipSlice = range->baseMipLevel + level; + rtv_desc.Texture2DArray.FirstArraySlice = range->baseArrayLayer; + rtv_desc.Texture2DArray.ArraySize = layer_count; + if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_1_BIT) + rtv_desc.Texture2DArray.PlaneSlice = 1; + else if (range->aspectMask & VK_IMAGE_ASPECT_PLANE_2_BIT) + rtv_desc.Texture2DArray.PlaneSlice = 2; + else + rtv_desc.Texture2DArray.PlaneSlice = 0; + break; + case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY: + rtv_desc.Texture2DMSArray.FirstArraySlice = range->baseArrayLayer; + rtv_desc.Texture2DMSArray.ArraySize = layer_count; + break; + case D3D12_RTV_DIMENSION_TEXTURE3D: + rtv_desc.Texture3D.MipSlice = range->baseMipLevel + level; + rtv_desc.Texture3D.FirstWSlice = range->baseArrayLayer; + rtv_desc.Texture3D.WSize = layer_count; + break; + } + + return rtv_desc; +} + +D3D12_RESOURCE_STATES +dzn_image_layout_to_state(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_PREINITIALIZED: + case VK_IMAGE_LAYOUT_UNDEFINED: + case VK_IMAGE_LAYOUT_GENERAL: + /* YOLO! */ + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + return D3D12_RESOURCE_STATE_COMMON; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return D3D12_RESOURCE_STATE_COPY_DEST; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return D3D12_RESOURCE_STATE_COPY_SOURCE; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: + return D3D12_RESOURCE_STATE_DEPTH_READ; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; + + default: + unreachable("not implemented"); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + return dzn_image_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pImage); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyImage(VkDevice device, VkImage image, + const VkAllocationCallbacks *pAllocator) +{ + dzn_image_destroy(dzn_image_from_handle(image), pAllocator); +} + +static dzn_image * +dzn_swapchain_get_image(dzn_device *device, + VkSwapchainKHR swapchain, + uint32_t index) +{ + uint32_t n_images = index + 1; + STACK_ARRAY(VkImage, images, n_images); + dzn_image *image = NULL; + + VkResult result = wsi_common_get_images(swapchain, &n_images, images); + + if (result == VK_SUCCESS || result == VK_INCOMPLETE) + image = dzn_image_from_handle(images[index]); + + STACK_ARRAY_FINISH(images); + return image; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_BindImageMemory2(VkDevice dev, + uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + + for (uint32_t i = 0; i < bindInfoCount; i++) { + const VkBindImageMemoryInfo *bind_info = &pBindInfos[i]; + VK_FROM_HANDLE(dzn_device_memory, mem, bind_info->memory); + VK_FROM_HANDLE(dzn_image, image, bind_info->image); + bool did_bind = false; + + vk_foreach_struct_const(s, bind_info->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR: { + const VkBindImageMemorySwapchainInfoKHR *swapchain_info = + (const VkBindImageMemorySwapchainInfoKHR *) s; + dzn_image *swapchain_image = + dzn_swapchain_get_image(device, + swapchain_info->swapchain, + swapchain_info->imageIndex); + assert(swapchain_image); + assert(image->vk.aspects == swapchain_image->vk.aspects); + assert(mem == NULL); + + /* TODO: something something binding the image memory */ + assert(false); + + did_bind = true; + break; + } + default: + dzn_debug_ignored_stype(s->sType); + break; + } + } + + if (!did_bind) { + image->mem = mem; + image->mem_offset = bind_info->memoryOffset; + if (FAILED(device->dev->CreatePlacedResource(mem->heap, + bind_info->memoryOffset, + &image->desc, + mem->initial_state, + NULL, IID_PPV_ARGS(&image->res)))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + did_bind = true; + } + } + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetImageMemoryRequirements2(VkDevice _device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_image, image, pInfo->image); + dzn_physical_device *pdev = + container_of(device->vk.physical, dzn_physical_device, vk); + + vk_foreach_struct_const(ext, pInfo->pNext) { + dzn_debug_ignored_stype(ext->sType); + } + + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *requirements = + (VkMemoryDedicatedRequirements *)ext; + /* TODO: figure out dedicated allocations */ + requirements->prefersDedicatedAllocation = false; + requirements->requiresDedicatedAllocation = false; + break; + } + + default: + dzn_debug_ignored_stype(ext->sType); + break; + } + } + + D3D12_RESOURCE_ALLOCATION_INFO info = device->dev->GetResourceAllocationInfo(0, 1, &image->desc); + + pMemoryRequirements->memoryRequirements = VkMemoryRequirements { + .size = info.SizeInBytes, + .alignment = info.Alignment, + .memoryTypeBits = + dzn_physical_device_get_mem_type_mask_for_resource(pdev, &image->desc), + }; + + /* + * MSAA images need memory to be aligned on + * D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT (4MB), but the memory + * allocation function doesn't know what the memory will be used for, + * and forcing all allocations to be 4MB-aligned has a cost, so let's + * force MSAA resources to be at least 4MB, such that the allocation + * logic can consider sub-4MB allocations to not require this 4MB alignment. + */ + if (image->vk.samples > 1 && + pMemoryRequirements->memoryRequirements.size < D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) + pMemoryRequirements->memoryRequirements.size = D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_GetImageSubresourceLayout(VkDevice _device, + VkImage _image, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + VK_FROM_HANDLE(dzn_device, device, _device); + VK_FROM_HANDLE(dzn_image, image, _image); + + if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + assert(subresource->arrayLayer == 0); + assert(subresource->mipLevel == 0); + layout->offset = 0; + layout->rowPitch = image->linear.row_stride; + layout->depthPitch = 0; + layout->arrayPitch = 0; + layout->size = image->linear.size; + } else { + UINT subres_index = + dzn_image_get_subresource_index(image, subresource, + (VkImageAspectFlagBits)subresource->aspectMask); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; + UINT num_rows; + UINT64 row_size, total_size; + device->dev->GetCopyableFootprints(&image->desc, + subres_index, 1, + 0, // base-offset? + &footprint, + &num_rows, &row_size, + &total_size); + + layout->offset = footprint.Offset; + layout->rowPitch = footprint.Footprint.RowPitch; + layout->depthPitch = layout->rowPitch * footprint.Footprint.Height; + layout->arrayPitch = layout->depthPitch; // uuuh... why is this even here? + layout->size = total_size; + } +} + +static D3D12_SHADER_COMPONENT_MAPPING +translate_swizzle(VkComponentSwizzle in, uint32_t comp) +{ + switch (in) { + case VK_COMPONENT_SWIZZLE_IDENTITY: + return (D3D12_SHADER_COMPONENT_MAPPING) + (comp + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0); + case VK_COMPONENT_SWIZZLE_ZERO: + return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0; + case VK_COMPONENT_SWIZZLE_ONE: + return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1; + case VK_COMPONENT_SWIZZLE_R: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0; + case VK_COMPONENT_SWIZZLE_G: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1; + case VK_COMPONENT_SWIZZLE_B: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2; + case VK_COMPONENT_SWIZZLE_A: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3; + default: unreachable("Invalid swizzle"); + } +} + +static void +dzn_image_view_prepare_srv_desc(dzn_image_view *iview) +{ + uint32_t plane_slice = (iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0; + bool ms = iview->vk.image->samples > 1; + uint32_t layers_per_elem = + (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE || + iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) ? + 6 : 1; + bool use_array = (iview->vk.base_array_layer / layers_per_elem) > 0 || + (iview->vk.layer_count / layers_per_elem) > 1; + + iview->srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + iview->vk.image->usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + iview->vk.aspects), + }; + + D3D12_SHADER_COMPONENT_MAPPING swz[] = { + translate_swizzle(iview->vk.swizzle.r, 0), + translate_swizzle(iview->vk.swizzle.g, 1), + translate_swizzle(iview->vk.swizzle.b, 2), + translate_swizzle(iview->vk.swizzle.a, 3), + }; + + /* Swap components to fake B4G4R4A4 support. */ + if (iview->vk.format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { + static const D3D12_SHADER_COMPONENT_MAPPING bgra4_remap[] = { + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(swz); i++) + swz[i] = bgra4_remap[swz[i]]; + } + + iview->srv_desc.Shader4ComponentMapping = + D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(swz[0], swz[1], swz[2], swz[3]); + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + case VK_IMAGE_VIEW_TYPE_1D: + if (use_array) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + iview->srv_desc.Texture1DArray.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture1DArray.MipLevels = iview->vk.level_count; + iview->srv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->srv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + iview->srv_desc.Texture1D.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture1D.MipLevels = iview->vk.level_count; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_2D: + if (use_array && ms) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + iview->srv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; + iview->srv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; + } else if (use_array && !ms) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + iview->srv_desc.Texture2DArray.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture2DArray.MipLevels = iview->vk.level_count; + iview->srv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->srv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + iview->srv_desc.Texture2DArray.PlaneSlice = plane_slice; + } else if (!use_array && ms) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + } else { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + iview->srv_desc.Texture2D.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture2D.MipLevels = iview->vk.level_count; + iview->srv_desc.Texture2D.PlaneSlice = plane_slice; + } + break; + + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + if (use_array) { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + iview->srv_desc.TextureCubeArray.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.TextureCubeArray.MipLevels = iview->vk.level_count; + iview->srv_desc.TextureCubeArray.First2DArrayFace = iview->vk.base_array_layer; + iview->srv_desc.TextureCubeArray.NumCubes = iview->vk.layer_count / 6; + } else { + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + iview->srv_desc.TextureCube.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.TextureCube.MipLevels = iview->vk.level_count; + } + break; + + case VK_IMAGE_VIEW_TYPE_3D: + iview->srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + iview->srv_desc.Texture3D.MostDetailedMip = iview->vk.base_mip_level; + iview->srv_desc.Texture3D.MipLevels = iview->vk.level_count; + break; + + default: unreachable("Invalid view type"); + } +} + +static void +dzn_image_view_prepare_uav_desc(dzn_image_view *iview) +{ + bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; + + assert(iview->vk.image->samples == 1); + + iview->uav_desc = D3D12_UNORDERED_ACCESS_VIEW_DESC { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + VK_IMAGE_USAGE_STORAGE_BIT, + iview->vk.aspects), + }; + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + if (use_array) { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + iview->uav_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->uav_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + iview->uav_desc.Texture1D.MipSlice = iview->vk.base_mip_level; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if (use_array) { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + iview->uav_desc.Texture2DArray.PlaneSlice = 0; + iview->uav_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->uav_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + } else { + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + iview->uav_desc.Texture2D.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture2D.PlaneSlice = 0; + } + break; + case VK_IMAGE_VIEW_TYPE_3D: + iview->uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + iview->uav_desc.Texture3D.MipSlice = iview->vk.base_mip_level; + iview->uav_desc.Texture3D.FirstWSlice = 0; + iview->uav_desc.Texture3D.WSize = iview->vk.extent.depth; + break; + default: unreachable("Invalid type"); + } +} + +static void +dzn_image_view_prepare_rtv_desc(dzn_image_view *iview) +{ + bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; + bool ms = iview->vk.image->samples > 1; + uint32_t plane_slice = + (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_2_BIT) ? 2 : + (iview->vk.aspects & VK_IMAGE_ASPECT_PLANE_1_BIT) ? 1 : 0; + + assert(iview->vk.level_count == 1); + + iview->rtv_desc = D3D12_RENDER_TARGET_VIEW_DESC { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + iview->vk.aspects), + }; + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + if (use_array) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + iview->rtv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + iview->rtv_desc.Texture1D.MipSlice = iview->vk.base_mip_level; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if (use_array && ms) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + iview->rtv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; + } else if (use_array && !ms) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + iview->rtv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->rtv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + iview->rtv_desc.Texture2DArray.PlaneSlice = plane_slice; + } else if (!use_array && ms) { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; + } else { + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + iview->rtv_desc.Texture2D.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture2D.PlaneSlice = plane_slice; + } + break; + + case VK_IMAGE_VIEW_TYPE_3D: + iview->rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + iview->rtv_desc.Texture3D.MipSlice = iview->vk.base_mip_level; + iview->rtv_desc.Texture3D.FirstWSlice = 0; + iview->rtv_desc.Texture3D.WSize = iview->vk.extent.depth; + break; + + default: unreachable("Invalid view type"); + } +} + +static void +dzn_image_view_prepare_dsv_desc(dzn_image_view *iview) +{ + bool use_array = iview->vk.base_array_layer > 0 || iview->vk.layer_count > 1; + bool ms = iview->vk.image->samples > 1; + + iview->dsv_desc = D3D12_DEPTH_STENCIL_VIEW_DESC { + .Format = + dzn_image_get_dxgi_format(iview->vk.format, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + iview->vk.aspects), + }; + + switch (iview->vk.view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + if (use_array) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + iview->dsv_desc.Texture1DArray.MipSlice = iview->vk.base_mip_level; + iview->dsv_desc.Texture1DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->dsv_desc.Texture1DArray.ArraySize = iview->vk.layer_count; + } else { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; + iview->dsv_desc.Texture1D.MipSlice = iview->vk.base_mip_level; + } + break; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + if (use_array && ms) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + iview->dsv_desc.Texture2DMSArray.FirstArraySlice = iview->vk.base_array_layer; + iview->dsv_desc.Texture2DMSArray.ArraySize = iview->vk.layer_count; + } else if (use_array && !ms) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + iview->dsv_desc.Texture2DArray.MipSlice = iview->vk.base_mip_level; + iview->dsv_desc.Texture2DArray.FirstArraySlice = iview->vk.base_array_layer; + iview->dsv_desc.Texture2DArray.ArraySize = iview->vk.layer_count; + } else if (!use_array && ms) { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; + } else { + iview->dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + iview->dsv_desc.Texture2D.MipSlice = iview->vk.base_mip_level; + } + break; + + default: unreachable("Invalid view type"); + } +} + +void +dzn_image_view_finish(dzn_image_view *iview) +{ + vk_image_view_finish(&iview->vk); +} + +void +dzn_image_view_init(dzn_device *device, + dzn_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo) +{ + VK_FROM_HANDLE(dzn_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + uint32_t level_count = dzn_get_level_count(image, range); + uint32_t layer_count = dzn_get_layer_count(image, range); + uint32_t plane_slice = + pCreateInfo->subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? 1 : 0; + + vk_image_view_init(&device->vk, &iview->vk, pCreateInfo); + + assert(layer_count > 0); + assert(range->baseMipLevel < image->vk.mip_levels); + + /* View usage should be a subset of image usage */ + assert(image->vk.usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + switch (image->vk.image_type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 <= image->vk.array_layers); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + dzn_get_layer_count(image, range) - 1 + <= u_minify(image->vk.extent.depth, range->baseMipLevel)); + break; + } + + dzn_image_view_prepare_srv_desc(iview); + + if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) + dzn_image_view_prepare_uav_desc(iview); + + if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + dzn_image_view_prepare_rtv_desc(iview); + + if (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + dzn_image_view_prepare_dsv_desc(iview); +} + +static void +dzn_image_view_destroy(dzn_image_view *iview, + const VkAllocationCallbacks *pAllocator) +{ + if (!iview) + return; + + dzn_device *device = container_of(iview->vk.base.device, dzn_device, vk); + + vk_image_view_finish(&iview->vk); + vk_free2(&device->vk.alloc, pAllocator, iview); +} + +static VkResult +dzn_image_view_create(dzn_device *device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *out) +{ + dzn_image_view *iview = (dzn_image_view *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!iview) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_image_view_init(device, iview, pCreateInfo); + + *out = dzn_image_view_to_handle(iview); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateImageView(VkDevice device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + return dzn_image_view_create(dzn_device_from_handle(device), pCreateInfo, + pAllocator, pView); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyImageView(VkDevice device, + VkImageView imageView, + const VkAllocationCallbacks *pAllocator) +{ + dzn_image_view_destroy(dzn_image_view_from_handle(imageView), pAllocator); +} + +static void +dzn_buffer_view_destroy(dzn_buffer_view *bview, + const VkAllocationCallbacks *pAllocator) +{ + if (!bview) + return; + + dzn_device *device = container_of(bview->base.device, dzn_device, vk); + + vk_object_base_finish(&bview->base); + vk_free2(&device->vk.alloc, pAllocator, bview); +} + +static VkResult +dzn_buffer_view_create(dzn_device *device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *out) +{ + VK_FROM_HANDLE(dzn_buffer, buf, pCreateInfo->buffer); + + dzn_buffer_view *bview = (dzn_buffer_view *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*bview), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!bview) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &bview->base, VK_OBJECT_TYPE_BUFFER_VIEW); + + enum pipe_format pfmt = vk_format_to_pipe_format(pCreateInfo->format); + unsigned blksz = util_format_get_blocksize(pfmt); + VkDeviceSize size = + pCreateInfo->range == VK_WHOLE_SIZE ? + buf->size - pCreateInfo->offset : pCreateInfo->range; + + bview->buffer = buf; + if (buf->usage & + (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) { + bview->srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC { + .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format), + .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, + .Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + .Buffer = { + .FirstElement = pCreateInfo->offset / blksz, + .NumElements = UINT(size / blksz), + .Flags = D3D12_BUFFER_SRV_FLAG_NONE, + }, + }; + } + + if (buf->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + bview->uav_desc = D3D12_UNORDERED_ACCESS_VIEW_DESC { + .Format = dzn_buffer_get_dxgi_format(pCreateInfo->format), + .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, + .Buffer = { + .FirstElement = pCreateInfo->offset / blksz, + .NumElements = UINT(size / blksz), + .Flags = D3D12_BUFFER_UAV_FLAG_NONE, + }, + }; + } + + *out = dzn_buffer_view_to_handle(bview); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateBufferView(VkDevice device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + return dzn_buffer_view_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pView); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyBufferView(VkDevice device, + VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + dzn_buffer_view_destroy(dzn_buffer_view_from_handle(bufferView), pAllocator); +} diff --git a/src/microsoft/vulkan/dzn_meta.cpp b/src/microsoft/vulkan/dzn_meta.cpp new file mode 100644 index 00000000000..088bc6c5adc --- /dev/null +++ b/src/microsoft/vulkan/dzn_meta.cpp @@ -0,0 +1,744 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "spirv_to_dxil.h" +#include "nir_to_dxil.h" + +#include "dxil_nir.h" +#include "dxil_nir_lower_int_samplers.h" + +static void +dzn_meta_compile_shader(dzn_device *device, nir_shader *nir, + D3D12_SHADER_BYTECODE *slot) +{ + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + IDxcValidator *validator = instance->dxc.validator; + IDxcLibrary *library = instance->dxc.library; + IDxcCompiler *compiler = instance->dxc.compiler; + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + if ((instance->debug_flags & DZN_DEBUG_NIR) && + (instance->debug_flags & DZN_DEBUG_INTERNAL)) + nir_print_shader(nir, stderr); + + struct nir_to_dxil_options opts = { .environment = DXIL_ENVIRONMENT_VULKAN }; + struct blob dxil_blob; + bool ret = nir_to_dxil(nir, &opts, &dxil_blob); + assert(ret); + + dzn_shader_blob blob(dxil_blob.data, dxil_blob.size); + ComPtr result; + validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit, &result); + if ((instance->debug_flags & DZN_DEBUG_DXIL) && + (instance->debug_flags & DZN_DEBUG_INTERNAL)) { + IDxcBlobEncoding *disassembly; + compiler->Disassemble(&blob, &disassembly); + ComPtr blobUtf8; + library->GetBlobAsUtf8(disassembly, blobUtf8.GetAddressOf()); + char *disasm = reinterpret_cast(blobUtf8->GetBufferPointer()); + disasm[blobUtf8->GetBufferSize() - 1] = 0; + fprintf(stderr, + "== BEGIN SHADER ============================================\n" + "%s\n" + "== END SHADER ==============================================\n", + disasm); + disassembly->Release(); + } + + HRESULT validationStatus; + result->GetStatus(&validationStatus); + if (FAILED(validationStatus)) { + if ((instance->debug_flags & DZN_DEBUG_DXIL) && + (instance->debug_flags & DZN_DEBUG_INTERNAL)) { + ComPtr printBlob, printBlobUtf8; + result->GetErrorBuffer(&printBlob); + library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf()); + + char *errorString; + if (printBlobUtf8) { + errorString = reinterpret_cast(printBlobUtf8->GetBufferPointer()); + errorString[printBlobUtf8->GetBufferSize() - 1] = 0; + fprintf(stderr, + "== VALIDATION ERROR =============================================\n" + "%s\n" + "== END ==========================================================\n", + errorString); + } + } + } + assert(!FAILED(validationStatus)); + + void *data; + size_t size; + blob_finish_get_buffer(&dxil_blob, &data, &size); + slot->pShaderBytecode = data; + slot->BytecodeLength = size; +} + +#define DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT 4 + +static void +dzn_meta_indirect_draw_finish(dzn_device *device, enum dzn_indirect_draw_type type) +{ + dzn_meta_indirect_draw *meta = &device->indirect_draws[type]; + + if (meta->root_sig) + meta->root_sig->Release(); + + if (meta->pipeline_state) + meta->pipeline_state->Release(); +} + +static VkResult +dzn_meta_indirect_draw_init(dzn_device *device, + enum dzn_indirect_draw_type type) +{ + dzn_meta_indirect_draw *meta = &device->indirect_draws[type]; + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + VkResult ret = VK_SUCCESS; + + glsl_type_singleton_init_or_ref(); + + nir_shader *nir = dzn_nir_indirect_draw_shader(type); + bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN || + type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; + uint32_t shader_params_size = + triangle_fan ? + sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : + sizeof(struct dzn_indirect_draw_rewrite_params); + + uint32_t root_param_count = 0; + D3D12_ROOT_PARAMETER1 root_params[DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT]; + + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = shader_params_size / 4, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV, + .Descriptor = { + .ShaderRegister = 1, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, + .Descriptor = { + .ShaderRegister = 2, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + + if (triangle_fan) { + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, + .Descriptor = { + .ShaderRegister = 3, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + } + + assert(root_param_count <= ARRAY_SIZE(root_params)); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = root_param_count, + .pParameters = root_params, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + meta->root_sig = + dzn_device_create_root_sig(device, &root_sig_desc); + if (!meta->root_sig) { + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + goto out; + } + + desc.pRootSignature = meta->root_sig; + dzn_meta_compile_shader(device, nir, &desc.CS); + assert(desc.CS.pShaderBytecode); + + if (FAILED(device->dev->CreateComputePipelineState(&desc, + IID_PPV_ARGS(&meta->pipeline_state)))) + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + +out: + if (ret != VK_SUCCESS) + dzn_meta_indirect_draw_finish(device, type); + + free((void *)desc.CS.pShaderBytecode); + ralloc_free(nir); + glsl_type_singleton_decref(); + + return ret; +} + +#define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 3 + +static void +dzn_meta_triangle_fan_rewrite_index_finish(dzn_device *device, + enum dzn_index_type old_index_type) +{ + dzn_meta_triangle_fan_rewrite_index *meta = + &device->triangle_fan[old_index_type]; + + if (meta->root_sig) + meta->root_sig->Release(); + if (meta->pipeline_state) + meta->pipeline_state->Release(); + if (meta->cmd_sig) + meta->cmd_sig->Release(); +} + +static VkResult +dzn_meta_triangle_fan_rewrite_index_init(dzn_device *device, + enum dzn_index_type old_index_type) +{ + dzn_meta_triangle_fan_rewrite_index *meta = + &device->triangle_fan[old_index_type]; + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + VkResult ret = VK_SUCCESS; + + glsl_type_singleton_init_or_ref(); + + uint8_t old_index_size = dzn_index_size(old_index_type); + + nir_shader *nir = dzn_nir_triangle_fan_rewrite_index_shader(old_index_size); + + uint32_t root_param_count = 0; + D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT]; + + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV, + .Descriptor = { + .ShaderRegister = 1, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + + if (old_index_type != DZN_NO_INDEX) { + root_params[root_param_count++] = D3D12_ROOT_PARAMETER1 { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV, + .Descriptor = { + .ShaderRegister = 2, + .RegisterSpace = 0, + .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + }; + } + + assert(root_param_count <= ARRAY_SIZE(root_params)); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = root_param_count, + .pParameters = root_params, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + D3D12_INDIRECT_ARGUMENT_DESC cmd_args[] = { + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, + .UnorderedAccessView = { + .RootParameterIndex = 0, + }, + }, + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + .Constant = { + .RootParameterIndex = 1, + .DestOffsetIn32BitValues = 0, + .Num32BitValuesToSet = sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, + }, + }, + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, + }, + }; + + D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { + .ByteStride = sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params), + .NumArgumentDescs = ARRAY_SIZE(cmd_args), + .pArgumentDescs = cmd_args, + }; + + assert((cmd_sig_desc.ByteStride & 7) == 0); + + meta->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); + if (!meta->root_sig) { + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + goto out; + } + + + desc.pRootSignature = meta->root_sig; + dzn_meta_compile_shader(device, nir, &desc.CS); + + if (FAILED(device->dev->CreateComputePipelineState(&desc, + IID_PPV_ARGS(&meta->pipeline_state)))) { + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + goto out; + } + + if (FAILED(device->dev->CreateCommandSignature(&cmd_sig_desc, + meta->root_sig, + IID_PPV_ARGS(&meta->cmd_sig)))) + ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); + +out: + if (ret != VK_SUCCESS) + dzn_meta_triangle_fan_rewrite_index_finish(device, old_index_type); + + free((void *)desc.CS.pShaderBytecode); + ralloc_free(nir); + glsl_type_singleton_decref(); + + return ret; +} + +static const D3D12_SHADER_BYTECODE * +dzn_meta_blits_get_vs(dzn_device *device) +{ + dzn_meta_blits *meta = &device->blits; + D3D12_SHADER_BYTECODE *out; + + mtx_lock(&meta->shaders_lock); + + if (meta->vs.pShaderBytecode == NULL) { + nir_shader *nir = dzn_nir_blit_vs(); + + NIR_PASS_V(nir, nir_lower_system_values); + + gl_system_value system_values[] = { + SYSTEM_VALUE_FIRST_VERTEX, + SYSTEM_VALUE_BASE_VERTEX, + }; + + NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values, + ARRAY_SIZE(system_values)); + + D3D12_SHADER_BYTECODE bc; + + dzn_meta_compile_shader(device, nir, &bc); + meta->vs.pShaderBytecode = + vk_alloc(&device->vk.alloc, bc.BytecodeLength, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (meta->vs.pShaderBytecode) { + meta->vs.BytecodeLength = bc.BytecodeLength; + memcpy((void *)meta->vs.pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength); + out = &meta->vs; + } + free((void *)bc.pShaderBytecode); + ralloc_free(nir); + } else { + out = &meta->vs; + } + + mtx_unlock(&meta->shaders_lock); + + return &meta->vs; +} + +static const D3D12_SHADER_BYTECODE * +dzn_meta_blits_get_fs(dzn_device *device, + const struct dzn_nir_blit_info *info) +{ + dzn_meta_blits *meta = &device->blits; + D3D12_SHADER_BYTECODE *out = NULL; + + mtx_lock(&meta->shaders_lock); + + STATIC_ASSERT(sizeof(struct dzn_nir_blit_info) == sizeof(uint32_t)); + + struct hash_entry *he = + _mesa_hash_table_search(meta->fs, (void *)(uintptr_t)info->hash_key); + + if (!he) { + nir_shader *nir = dzn_nir_blit_fs(info); + + if (info->out_type != GLSL_TYPE_FLOAT) { + dxil_wrap_sampler_state wrap_state = { + .is_int_sampler = 1, + .is_linear_filtering = 0, + .skip_boundary_conditions = 1, + }; + dxil_lower_sample_to_txf_for_integer_tex(nir, &wrap_state, NULL, 0); + } + + D3D12_SHADER_BYTECODE bc; + + dzn_meta_compile_shader(device, nir, &bc); + + out = (D3D12_SHADER_BYTECODE *) + vk_alloc(&device->vk.alloc, + sizeof(D3D12_SHADER_BYTECODE) + bc.BytecodeLength, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (out) { + out->pShaderBytecode = (void *)(out + 1); + memcpy((void *)out->pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength); + out->BytecodeLength = bc.BytecodeLength; + _mesa_hash_table_insert(meta->fs, &info->hash_key, out); + } + free((void *)bc.pShaderBytecode); + ralloc_free(nir); + } else { + out = (D3D12_SHADER_BYTECODE *)he->data; + } + + mtx_unlock(&meta->shaders_lock); + + return out; +} + +static void +dzn_meta_blit_destroy(dzn_device *device, dzn_meta_blit *blit) +{ + if (!blit) + return; + + if (blit->root_sig) + blit->root_sig->Release(); + if (blit->pipeline_state) + blit->pipeline_state->Release(); + + vk_free(&device->vk.alloc, blit); +} + +static dzn_meta_blit * +dzn_meta_blit_create(dzn_device *device, const dzn_meta_blit_key *key) +{ + dzn_meta_blits *blits = &device->blits; + dzn_meta_blit *blit = (dzn_meta_blit *) + vk_zalloc(&device->vk.alloc, sizeof(*blit), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (!blit) + return NULL; + + D3D12_DESCRIPTOR_RANGE1 ranges[] = { + { + .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + .NumDescriptors = 1, + .BaseShaderRegister = 0, + .RegisterSpace = 0, + .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS, + .OffsetInDescriptorsFromTableStart = 0, + }, + }; + + D3D12_STATIC_SAMPLER_DESC samplers[] = { + { + .Filter = key->linear_filter ? + D3D12_FILTER_MIN_MAG_MIP_LINEAR : + D3D12_FILTER_MIN_MAG_MIP_POINT, + .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + .MipLODBias = 0, + .MaxAnisotropy = 0, + .MinLOD = 0, + .MaxLOD = D3D12_FLOAT32_MAX, + .ShaderRegister = 0, + .RegisterSpace = 0, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, + }, + }; + + D3D12_ROOT_PARAMETER1 root_params[] = { + { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + .DescriptorTable = { + .NumDescriptorRanges = ARRAY_SIZE(ranges), + .pDescriptorRanges = ranges, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL, + }, + { + .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS, + .Constants = { + .ShaderRegister = 0, + .RegisterSpace = 0, + .Num32BitValues = 17, + }, + .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX, + }, + }; + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = { + .Version = D3D_ROOT_SIGNATURE_VERSION_1_1, + .Desc_1_1 = { + .NumParameters = ARRAY_SIZE(root_params), + .pParameters = root_params, + .NumStaticSamplers = ARRAY_SIZE(samplers), + .pStaticSamplers = samplers, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE, + }, + }; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { + .SampleMask = key->resolve ? 1 : (1ULL << key->samples) - 1, + .RasterizerState = { + .FillMode = D3D12_FILL_MODE_SOLID, + .CullMode = D3D12_CULL_MODE_NONE, + .DepthClipEnable = TRUE, + }, + .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + .SampleDesc = { + .Count = key->resolve ? 1 : key->samples, + .Quality = 0, + }, + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + struct dzn_nir_blit_info blit_fs_info = { + .src_samples = key->samples, + .loc = key->loc, + .out_type = key->out_type, + .sampler_dim = key->sampler_dim, + .src_is_array = key->src_is_array, + .resolve = key->resolve, + .padding = 0, + }; + + blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc); + if (!blit->root_sig) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + desc.pRootSignature = blit->root_sig; + + const D3D12_SHADER_BYTECODE *vs, *fs; + + vs = dzn_meta_blits_get_vs(device); + if (!vs) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + desc.VS = *vs; + assert(desc.VS.pShaderBytecode); + + fs = dzn_meta_blits_get_fs(device, &blit_fs_info); + if (!fs) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + desc.PS = *fs; + assert(desc.PS.pShaderBytecode); + + assert(key->loc == FRAG_RESULT_DATA0 || + key->loc == FRAG_RESULT_DEPTH || + key->loc == FRAG_RESULT_STENCIL); + + if (key->loc == FRAG_RESULT_DATA0) { + desc.NumRenderTargets = 1; + desc.RTVFormats[0] = key->out_format; + desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf; + } else { + desc.DSVFormat = key->out_format; + if (key->loc == FRAG_RESULT_DEPTH) { + desc.DepthStencilState.DepthEnable = TRUE; + desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + } else { + assert(key->loc == FRAG_RESULT_STENCIL); + desc.DepthStencilState.StencilEnable = TRUE; + desc.DepthStencilState.StencilWriteMask = 0xff; + desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE; + desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE; + desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; + desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + desc.DepthStencilState.BackFace = desc.DepthStencilState.FrontFace; + } + } + + if (FAILED(device->dev->CreateGraphicsPipelineState(&desc, + IID_PPV_ARGS(&blit->pipeline_state)))) { + dzn_meta_blit_destroy(device, blit); + return NULL; + } + + return blit; +} + +const dzn_meta_blit * +dzn_meta_blits_get_context(dzn_device *device, + const dzn_meta_blit_key *key) +{ + dzn_meta_blit *out = NULL; + + STATIC_ASSERT(sizeof(key) == sizeof(uint64_t)); + + mtx_lock(&device->blits.contexts_lock); + + out = (dzn_meta_blit *) + _mesa_hash_table_u64_search(device->blits.contexts, key->u64); + if (!out) { + out = dzn_meta_blit_create(device, key); + + if (out) + _mesa_hash_table_u64_insert(device->blits.contexts, key->u64, out); + } + + mtx_unlock(&device->blits.contexts_lock); + + return out; +} + +static void +dzn_meta_blits_finish(dzn_device *device) +{ + dzn_meta_blits *meta = &device->blits; + + vk_free(&device->vk.alloc, (void *)meta->vs.pShaderBytecode); + + if (meta->fs) { + hash_table_foreach(meta->fs, he) + vk_free(&device->vk.alloc, he->data); + _mesa_hash_table_destroy(meta->fs, NULL); + } + + if (meta->contexts) { + hash_table_foreach(meta->contexts->table, he) + dzn_meta_blit_destroy(device, (dzn_meta_blit *)he->data); + _mesa_hash_table_u64_destroy(meta->contexts); + } + + mtx_destroy(&meta->shaders_lock); + mtx_destroy(&meta->contexts_lock); +} + +static VkResult +dzn_meta_blits_init(dzn_device *device) +{ + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + dzn_meta_blits *meta = &device->blits; + + mtx_init(&meta->shaders_lock, mtx_plain); + mtx_init(&meta->contexts_lock, mtx_plain); + + meta->fs = _mesa_hash_table_create_u32_keys(NULL); + if (!meta->fs) { + dzn_meta_blits_finish(device); + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + meta->contexts = _mesa_hash_table_u64_create(NULL); + if (!meta->contexts) { + dzn_meta_blits_finish(device); + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + return VK_SUCCESS; +} + +void +dzn_meta_finish(dzn_device *device) +{ + for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) + dzn_meta_triangle_fan_rewrite_index_finish(device, (enum dzn_index_type)i); + + for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) + dzn_meta_indirect_draw_finish(device, (enum dzn_indirect_draw_type)i); + + dzn_meta_blits_finish(device); +} + +VkResult +dzn_meta_init(dzn_device *device) +{ + VkResult result = dzn_meta_blits_init(device); + if (result != VK_SUCCESS) + goto out; + + for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) { + VkResult result = + dzn_meta_indirect_draw_init(device, (enum dzn_indirect_draw_type)i); + if (result != VK_SUCCESS) + goto out; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) { + VkResult result = + dzn_meta_triangle_fan_rewrite_index_init(device, (enum dzn_index_type)i); + if (result != VK_SUCCESS) + goto out; + } + +out: + if (result != VK_SUCCESS) { + dzn_meta_finish(device); + return result; + } + + return VK_SUCCESS; +} diff --git a/src/microsoft/vulkan/dzn_nir.c b/src/microsoft/vulkan/dzn_nir.c new file mode 100644 index 00000000000..649d900fd77 --- /dev/null +++ b/src/microsoft/vulkan/dzn_nir.c @@ -0,0 +1,513 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "spirv_to_dxil.h" +#include "nir_to_dxil.h" +#include "nir_builder.h" +#include "nir_vulkan.h" + +#include "dzn_nir.h" + +static nir_ssa_def * +dzn_nir_create_bo_desc(nir_builder *b, + nir_variable_mode mode, + uint32_t desc_set, + uint32_t binding, + const char *name, + unsigned access) +{ + struct glsl_struct_field field = { + .type = mode == nir_var_mem_ubo ? + glsl_array_type(glsl_uint_type(), 4096, 4) : + glsl_uint_type(), + .name = "dummy_int", + }; + const struct glsl_type *dummy_type = + glsl_struct_type(&field, 1, "dummy_type", false); + + nir_variable *var = + nir_variable_create(b->shader, mode, dummy_type, name); + var->data.descriptor_set = desc_set; + var->data.binding = binding; + var->data.access = access; + + assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo); + if (mode == nir_var_mem_ubo) + b->shader->info.num_ubos++; + else + b->shader->info.num_ssbos++; + + VkDescriptorType desc_type = + var->data.mode == nir_var_mem_ubo ? + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + nir_address_format addr_format = nir_address_format_32bit_index_offset; + nir_ssa_def *index = + nir_vulkan_resource_index(b, + nir_address_format_num_components(addr_format), + nir_address_format_bit_size(addr_format), + nir_imm_int(b, 0), + .desc_set = desc_set, + .binding = binding, + .desc_type = desc_type); + + nir_ssa_def *desc = + nir_load_vulkan_descriptor(b, + nir_address_format_num_components(addr_format), + nir_address_format_bit_size(addr_format), + index, + .desc_type = desc_type); + + return nir_channel(b, desc, 0); +} + +nir_shader * +dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type) +{ + const char *type_str[] = { + "draw", + "indexed_draw", + "draw_triangle_fan", + "indexed_draw_triangle_fan", + }; + + assert(type < ARRAY_SIZE(type_str)); + + bool indexed = type == DZN_INDIRECT_INDEXED_DRAW || + type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; + bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN || + type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN; + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, + dxil_get_nir_compiler_options(), + "dzn_meta_indirect_%s()", + type_str[type]); + b.shader->info.internal = true; + + struct glsl_struct_field field = { + .type = glsl_uint_type(), + .name = "dummy_int", + }; + const struct glsl_type *dummy_type = + glsl_struct_type(&field, 1, "dummy_type", false); + + nir_ssa_def *params_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); + nir_ssa_def *draw_buf_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE); + nir_ssa_def *exec_buf_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE); + + unsigned params_size = + triangle_fan ? + sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) : + sizeof(struct dzn_indirect_draw_rewrite_params); + + nir_ssa_def *params = + nir_load_ubo(&b, params_size / 4, 32, + params_desc, nir_imm_int(&b, 0), + .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); + + nir_ssa_def *draw_stride = nir_channel(&b, params, 0); + nir_ssa_def *exec_stride = nir_imm_int(&b, sizeof(struct dzn_indirect_draw_exec_params)); + nir_ssa_def *index = + nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); + + nir_ssa_def *draw_offset = nir_imul(&b, draw_stride, index); + nir_ssa_def *exec_offset = nir_imul(&b, exec_stride, index); + + nir_ssa_def *draw_info1 = + nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4); + nir_ssa_def *draw_info2 = + indexed ? + nir_load_ssbo(&b, 1, 32, draw_buf_desc, + nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) : + nir_imm_int(&b, 0); + + nir_ssa_def *first_vertex = nir_channel(&b, draw_info1, indexed ? 3 : 2); + nir_ssa_def *base_instance = + indexed ? draw_info2 : nir_channel(&b, draw_info1, 3); + + nir_ssa_def *exec_vals[7] = { + first_vertex, + base_instance, + }; + + if (triangle_fan) { + /* Patch {vertex,index}_count and first_index */ + nir_ssa_def *triangle_count = + nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2)); + exec_vals[2] = nir_imul_imm(&b, triangle_count, 3); + exec_vals[3] = nir_channel(&b, draw_info1, 1); + exec_vals[4] = nir_imm_int(&b, 0); + exec_vals[5] = indexed ? nir_channel(&b, draw_info1, 3) : nir_imm_int(&b, 0); + exec_vals[6] = indexed ? draw_info2 : nir_channel(&b, draw_info1, 3); + + nir_ssa_def *triangle_fan_exec_buf_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, + "triangle_fan_exec_buf", + ACCESS_NON_READABLE); + nir_ssa_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1); + nir_ssa_def *triangle_fan_index_buf_addr_lo = + nir_iadd(&b, nir_channel(&b, params, 2), + nir_imul(&b, triangle_fan_index_buf_stride, index)); + nir_ssa_def *addr_lo_overflow = + nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2)); + nir_ssa_def *triangle_fan_index_buf_addr_hi = + nir_iadd(&b, nir_channel(&b, params, 3), + nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0))); + + nir_ssa_def *triangle_fan_exec_vals[] = { + triangle_fan_index_buf_addr_lo, + triangle_fan_index_buf_addr_hi, + nir_channel(&b, draw_info1, 2), + triangle_count, + nir_imm_int(&b, 1), + nir_imm_int(&b, 1), + }; + + assert(sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params) == (ARRAY_SIZE(triangle_fan_exec_vals) * 4)); + + nir_ssa_def *triangle_fan_exec_stride = + nir_imm_int(&b, sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params)); + nir_ssa_def *triangle_fan_exec_offset = + nir_imul(&b, triangle_fan_exec_stride, index); + + nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[0], 4), + triangle_fan_exec_buf_desc, triangle_fan_exec_offset, + .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 4); + nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[4], 2), + triangle_fan_exec_buf_desc, + nir_iadd_imm(&b, triangle_fan_exec_offset, 16), + .write_mask = 0x3, .access = ACCESS_NON_READABLE, .align_mul = 4); + + nir_ssa_def *ibview_vals[] = { + triangle_fan_index_buf_addr_lo, + triangle_fan_index_buf_addr_hi, + triangle_fan_index_buf_stride, + nir_imm_int(&b, DXGI_FORMAT_R32_UINT), + }; + + nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)), + exec_buf_desc, exec_offset, + .write_mask = 0x3, .access = ACCESS_NON_READABLE, .align_mul = 4); + exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4); + } else { + exec_vals[2] = nir_channel(&b, draw_info1, 0); + exec_vals[3] = nir_channel(&b, draw_info1, 1); + exec_vals[4] = nir_channel(&b, draw_info1, 2); + exec_vals[5] = nir_channel(&b, draw_info1, 3); + exec_vals[6] = draw_info2; + } + + nir_store_ssbo(&b, nir_vec(&b, exec_vals, 4), + exec_buf_desc, exec_offset, + .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 4); + nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], 3), + exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16), + .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4); + + + return b.shader; +} + +nir_shader * +dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size) +{ + assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4); + + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, + dxil_get_nir_compiler_options(), + "dzn_meta_triangle_rewrite_index(old_index_size=%d)", + old_index_size); + b.shader->info.internal = true; + + nir_ssa_def *params_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); + nir_ssa_def *new_index_buf_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, + "new_index_buf", ACCESS_NON_READABLE); + + nir_ssa_def *old_index_buf_desc = NULL; + if (old_index_size > 0) { + old_index_buf_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, + "old_index_buf", ACCESS_NON_WRITEABLE); + } + + nir_ssa_def *params = + nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32, + params_desc, nir_imm_int(&b, 0), + .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); + + nir_ssa_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0); + nir_ssa_def *new_indices; + + if (old_index_size > 0) { + nir_ssa_def *old_first_index = nir_channel(&b, params, 0); + nir_ssa_def *old_index0_offset = + nir_imul_imm(&b, old_first_index, old_index_size); + nir_ssa_def *old_index1_offset = + nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index), + old_index_size); + + nir_ssa_def *old_index0 = + nir_load_ssbo(&b, 1, 32, old_index_buf_desc, + old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset, + .align_mul = 4); + + if (old_index_size == 2) { + old_index0 = + nir_bcsel(&b, + nir_ieq_imm(&b, nir_iand_imm(&b, old_index0_offset, 0x2), 0), + nir_iand_imm(&b, old_index0, 0xffff), + nir_ushr_imm(&b, old_index0, 16)); + } + + nir_ssa_def *old_index12 = + nir_load_ssbo(&b, 2, 32, old_index_buf_desc, + old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset, + .align_mul = 4); + if (old_index_size == 2) { + nir_ssa_def *indices[] = { + nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff), + nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16), + nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff), + }; + + old_index12 = + nir_bcsel(&b, + nir_ieq_imm(&b, nir_iand_imm(&b, old_index1_offset, 0x2), 0), + nir_vec2(&b, indices[0], indices[1]), + nir_vec2(&b, indices[1], indices[2])); + } + + /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ + new_indices = + nir_vec3(&b, nir_channel(&b, old_index12, 0), + nir_channel(&b, old_index12, 1), old_index0); + } else { + nir_ssa_def *first_vertex = nir_channel(&b, params, 0); + + new_indices = + nir_vec3(&b, + nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), first_vertex), + nir_iadd(&b, nir_iadd_imm(&b, triangle, 2), first_vertex), + first_vertex); + } + + nir_ssa_def *new_index_offset = + nir_imul_imm(&b, triangle, 4 * 3); + + nir_store_ssbo(&b, new_indices, new_index_buf_desc, + new_index_offset, + .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4); + + return b.shader; +} + +nir_shader * +dzn_nir_blit_vs(void) +{ + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_VERTEX, + dxil_get_nir_compiler_options(), + "dzn_meta_blit_vs()"); + b.shader->info.internal = true; + + nir_ssa_def *params_desc = + dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0); + + nir_variable *out_pos = + nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), + "gl_Position"); + out_pos->data.location = VARYING_SLOT_POS; + out_pos->data.driver_location = 0; + + nir_variable *out_coords = + nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3), + "coords"); + out_coords->data.location = VARYING_SLOT_TEX0; + out_coords->data.driver_location = 1; + + nir_ssa_def *vertex = nir_load_vertex_id(&b); + nir_ssa_def *base = nir_imul_imm(&b, vertex, 4 * sizeof(float)); + nir_ssa_def *coords = + nir_load_ubo(&b, 4, 32, params_desc, base, + .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0); + nir_ssa_def *pos = + nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1), + nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0)); + nir_ssa_def *z_coord = + nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)), + .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0); + coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord); + + nir_store_var(&b, out_pos, pos, 0xf); + nir_store_var(&b, out_coords, coords, 0x7); + return b.shader; +} + +nir_shader * +dzn_nir_blit_fs(const struct dzn_nir_blit_info *info) +{ + bool ms = info->src_samples > 1; + nir_alu_type nir_out_type = + nir_get_nir_type_for_glsl_base_type(info->out_type); + uint32_t coord_comps = + glsl_get_sampler_dim_coordinate_components(info->sampler_dim) + + info->src_is_array; + + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, + dxil_get_nir_compiler_options(), + "dzn_meta_blit_fs()"); + b.shader->info.internal = true; + + const struct glsl_type *tex_type = + glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type); + nir_variable *tex_var = + nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture"); + nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var); + + nir_variable *pos_var = + nir_variable_create(b.shader, nir_var_shader_in, + glsl_vector_type(GLSL_TYPE_FLOAT, 4), + "gl_FragCoord"); + pos_var->data.location = VARYING_SLOT_POS; + pos_var->data.driver_location = 0; + + nir_variable *coord_var = + nir_variable_create(b.shader, nir_var_shader_in, + glsl_vector_type(GLSL_TYPE_FLOAT, 3), + "coord"); + coord_var->data.location = VARYING_SLOT_TEX0; + coord_var->data.driver_location = 1; + nir_ssa_def *coord = + nir_channels(&b, nir_load_var(&b, coord_var), (1 << coord_comps) - 1); + + uint32_t out_comps = + (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4; + nir_variable *out = + nir_variable_create(b.shader, nir_var_shader_out, + glsl_vector_type(info->out_type, out_comps), + "out"); + out->data.location = info->loc; + + nir_ssa_def *res = NULL; + + if (info->resolve) { + /* When resolving a float type, we need to calculate the average of all + * samples. For integer resolve, Vulkan says that one sample should be + * chosen without telling which. Let's just pick the first one in that + * case. + */ + + unsigned nsamples = info->out_type == GLSL_TYPE_FLOAT ? + info->src_samples : 1; + for (unsigned s = 0; s < nsamples; s++) { + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4); + + tex->op = nir_texop_txf_ms; + tex->dest_type = nir_out_type; + tex->texture_index = 0; + tex->is_array = info->src_is_array; + tex->sampler_dim = info->sampler_dim; + + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); + tex->coord_components = coord_comps; + + tex->src[1].src_type = nir_tex_src_ms_index; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s)); + + tex->src[2].src_type = nir_tex_src_lod; + tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + + tex->src[3].src_type = nir_tex_src_texture_deref; + tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); + + nir_builder_instr_insert(&b, &tex->instr); + res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa; + } + + if (nsamples > 1) { + unsigned type_sz = nir_alu_type_get_type_size(nir_out_type); + res = nir_fmul(&b, res, nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz)); + } + } else { + nir_tex_instr *tex = + nir_tex_instr_create(b.shader, ms ? 4 : 3); + + tex->dest_type = nir_out_type; + tex->is_array = info->src_is_array; + tex->sampler_dim = info->sampler_dim; + + if (ms) { + tex->op = nir_texop_txf_ms; + + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord)); + tex->coord_components = coord_comps; + + tex->src[1].src_type = nir_tex_src_ms_index; + tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); + + tex->src[2].src_type = nir_tex_src_lod; + tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + + tex->src[3].src_type = nir_tex_src_texture_deref; + tex->src[3].src = nir_src_for_ssa(&tex_deref->dest.ssa); + } else { + nir_variable *sampler_var = + nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler"); + nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var); + + tex->op = nir_texop_tex; + tex->sampler_index = 0; + + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(coord); + tex->coord_components = coord_comps; + + tex->src[1].src_type = nir_tex_src_texture_deref; + tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa); + + tex->src[2].src_type = nir_tex_src_sampler_deref; + tex->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa); + } + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); + nir_builder_instr_insert(&b, &tex->instr); + res = &tex->dest.ssa; + } + + nir_store_var(&b, out, nir_channels(&b, res, (1 << out_comps) - 1), 0xf); + + return b.shader; +} diff --git a/src/microsoft/vulkan/dzn_nir.h b/src/microsoft/vulkan/dzn_nir.h new file mode 100644 index 00000000000..f3aced31a71 --- /dev/null +++ b/src/microsoft/vulkan/dzn_nir.h @@ -0,0 +1,138 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DZN_NIR_H +#define DZN_NIR_H + +#define D3D12_IGNORE_SDK_LAYERS +#include + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct dzn_indirect_draw_params { + uint32_t vertex_count; + uint32_t instance_count; + uint32_t first_vertex; + uint32_t first_instance; +}; + +struct dzn_indirect_indexed_draw_params { + uint32_t index_count; + uint32_t instance_count; + uint32_t first_index; + int32_t vertex_offset; + uint32_t first_instance; +}; + +struct dzn_indirect_draw_rewrite_params { + uint32_t draw_buf_stride; +}; + +struct dzn_indirect_draw_triangle_fan_rewrite_params { + uint32_t draw_buf_stride; + uint32_t triangle_fan_index_buf_stride; + uint64_t triangle_fan_index_buf_start; +}; + +struct dzn_indirect_draw_exec_params { + struct { + uint32_t first_vertex; + uint32_t base_instance; + } sysvals; + union { + struct dzn_indirect_draw_params draw; + struct dzn_indirect_indexed_draw_params indexed_draw; + }; +}; + +struct dzn_indirect_triangle_fan_draw_exec_params { + D3D12_INDEX_BUFFER_VIEW ibview; + struct { + uint32_t first_vertex; + uint32_t base_instance; + } sysvals; + union { + struct dzn_indirect_draw_params draw; + struct dzn_indirect_indexed_draw_params indexed_draw; + }; +}; + +struct dzn_triangle_fan_rewrite_index_params { + union { + uint32_t first_index; + uint32_t first_vertex; + }; +}; + +struct dzn_indirect_triangle_fan_rewrite_index_exec_params { + uint64_t new_index_buf; + struct dzn_triangle_fan_rewrite_index_params params; + struct { + uint32_t x, y, z; + } group_count; +}; + +enum dzn_indirect_draw_type { + DZN_INDIRECT_DRAW, + DZN_INDIRECT_INDEXED_DRAW, + DZN_INDIRECT_DRAW_TRIANGLE_FAN, + DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN, + DZN_NUM_INDIRECT_DRAW_TYPES, +}; + +nir_shader * +dzn_nir_indirect_draw_shader(enum dzn_indirect_draw_type type); + +nir_shader * +dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size); + +struct dzn_nir_blit_info { + union { + struct { + uint32_t src_samples : 6; + uint32_t loc : 4; + uint32_t out_type : 4; + uint32_t sampler_dim : 4; + uint32_t src_is_array : 1; + uint32_t resolve : 1; + uint32_t padding : 12; + }; + const uint32_t hash_key; + }; +}; + +nir_shader * +dzn_nir_blit_vs(void); + +nir_shader * +dzn_nir_blit_fs(const struct dzn_nir_blit_info *info); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/microsoft/vulkan/dzn_pass.cpp b/src/microsoft/vulkan/dzn_pass.cpp new file mode 100644 index 00000000000..590d5137420 --- /dev/null +++ b/src/microsoft/vulkan/dzn_pass.cpp @@ -0,0 +1,159 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_format.h" + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateRenderPass2(VkDevice dev, + const VkRenderPassCreateInfo2KHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, dzn_render_pass, pass, 1); + VK_MULTIALLOC_DECL(&ma, dzn_subpass, subpasses, + pCreateInfo->subpassCount); + VK_MULTIALLOC_DECL(&ma, dzn_attachment, attachments, + pCreateInfo->attachmentCount); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS); + pass->subpasses = subpasses; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = attachments; + pass->attachment_count = pCreateInfo->attachmentCount; + + assert(!pass->attachment_count || pass->attachments); + for (uint32_t i = 0; i < pass->attachment_count; i++) { + const VkAttachmentDescription2 *attachment = &pCreateInfo->pAttachments[i]; + + attachments[i].idx = i; + attachments[i].format = attachment->format; + assert(attachments[i].format); + if (vk_format_is_depth_or_stencil(attachment->format)) { + attachments[i].clear.depth = + attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; + attachments[i].clear.stencil = + attachment->stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; + } else { + attachments[i].clear.color = + attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR; + } + attachments[i].samples = attachment->samples; + attachments[i].before = dzn_image_layout_to_state(attachment->initialLayout); + attachments[i].after = dzn_image_layout_to_state(attachment->finalLayout); + attachments[i].last = attachments[i].before; + } + + assert(subpasses); + for (uint32_t i = 0; i < pass->subpass_count; i++) { + const VkSubpassDescription2 *subpass = &pCreateInfo->pSubpasses[i]; + const VkSubpassDescription2 *subpass_after = NULL; + + if (i + 1 < pass->subpass_count) + subpass_after = &pCreateInfo->pSubpasses[i + 1]; + + for (uint32_t j = 0; j < subpass->colorAttachmentCount; j++) { + uint32_t idx = subpass->pColorAttachments[j].attachment; + subpasses[i].colors[j].idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].colors[j].before = attachments[idx].last; + subpasses[i].colors[j].during = + dzn_image_layout_to_state(subpass->pColorAttachments[j].layout); + attachments[idx].last = subpasses[i].colors[j].during; + subpasses[i].color_count = j + 1; + } + + idx = subpass->pResolveAttachments ? + subpass->pResolveAttachments[j].attachment : + VK_ATTACHMENT_UNUSED; + subpasses[i].resolve[j].idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].resolve[j].before = attachments[idx].last; + subpasses[i].resolve[j].during = + dzn_image_layout_to_state(subpass->pResolveAttachments[j].layout); + attachments[idx].last = subpasses[i].resolve[j].during; + } + } + + subpasses[i].zs.idx = VK_ATTACHMENT_UNUSED; + if (subpass->pDepthStencilAttachment) { + uint32_t idx = subpass->pDepthStencilAttachment->attachment; + subpasses[i].zs.idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].zs.before = attachments[idx].last; + subpasses[i].zs.during = + dzn_image_layout_to_state(subpass->pDepthStencilAttachment->layout); + attachments[idx].last = subpasses[i].zs.during; + } + } + + subpasses[i].input_count = subpass->inputAttachmentCount; + for (uint32_t j = 0; j < subpasses[i].input_count; j++) { + uint32_t idx = subpass->pInputAttachments[j].attachment; + subpasses[i].inputs[j].idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpasses[i].inputs[j].before = attachments[idx].last; + subpasses[i].inputs[j].during = + dzn_image_layout_to_state(subpass->pInputAttachments[j].layout); + attachments[idx].last = subpasses[i].inputs[j].during; + } + } + } + + *pRenderPass = dzn_render_pass_to_handle(pass); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyRenderPass(VkDevice dev, + VkRenderPass p, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VK_FROM_HANDLE(dzn_render_pass, pass, p); + + if (!pass) + return; + + vk_object_base_finish(&pass->base); + vk_free2(&device->vk.alloc, pAllocator, pass); +} + + +VKAPI_ATTR void VKAPI_CALL +dzn_GetRenderAreaGranularity(VkDevice device, + VkRenderPass pass, + VkExtent2D *pGranularity) +{ + // FIXME: query the actual optimal granularity + pGranularity->width = pGranularity->height = 1; +} diff --git a/src/microsoft/vulkan/dzn_pipeline.cpp b/src/microsoft/vulkan/dzn_pipeline.cpp new file mode 100644 index 00000000000..f21ff8d00ec --- /dev/null +++ b/src/microsoft/vulkan/dzn_pipeline.cpp @@ -0,0 +1,1184 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "spirv_to_dxil.h" + +#include "vk_alloc.h" +#include "vk_util.h" +#include "vk_format.h" + +#include +#include + +#include +#include + +#include "util/u_debug.h" + +using Microsoft::WRL::ComPtr; + +static dxil_spirv_shader_stage +to_dxil_shader_stage(VkShaderStageFlagBits in) +{ + switch (in) { + case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL; + case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY; + case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT; + case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE; + default: unreachable("Unsupported stage"); + } +} + +static VkResult +dzn_pipeline_compile_shader(dzn_device *device, + const VkAllocationCallbacks *alloc, + dzn_pipeline_layout *layout, + const VkPipelineShaderStageCreateInfo *stage_info, + enum dxil_spirv_yz_flip_mode yz_flip_mode, + uint16_t y_flip_mask, uint16_t z_flip_mask, + D3D12_SHADER_BYTECODE *slot) +{ + dzn_instance *instance = + container_of(device->vk.physical->instance, dzn_instance, vk); + IDxcValidator *validator = instance->dxc.validator; + IDxcLibrary *library = instance->dxc.library; + IDxcCompiler *compiler = instance->dxc.compiler; + const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo; + VK_FROM_HANDLE(vk_shader_module, module, stage_info->module); + struct dxil_spirv_object dxil_object; + + /* convert VkSpecializationInfo */ + struct dxil_spirv_specialization *spec = NULL; + uint32_t num_spec = 0; + + if (spec_info && spec_info->mapEntryCount) { + spec = (struct dxil_spirv_specialization *) + vk_alloc2(&device->vk.alloc, alloc, + spec_info->mapEntryCount * sizeof(*spec), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!spec) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) { + const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i]; + const uint8_t *data = (const uint8_t *)spec_info->pData + entry->offset; + assert(data + entry->size <= (const uint8_t *)spec_info->pData + spec_info->dataSize); + spec[i].id = entry->constantID; + switch (entry->size) { + case 8: + spec[i].value.u64 = *(const uint64_t *)data; + break; + case 4: + spec[i].value.u32 = *(const uint32_t *)data; + break; + case 2: + spec[i].value.u16 = *(const uint16_t *)data; + break; + case 1: + spec[i].value.u8 = *(const uint8_t *)data; + break; + default: + assert(!"Invalid spec constant size"); + break; + } + + spec[i].defined_on_module = false; + } + + num_spec = spec_info->mapEntryCount; + } + + struct dxil_spirv_runtime_conf conf = { + .runtime_data_cbv = { + .register_space = DZN_REGISTER_SPACE_SYSVALS, + .base_shader_register = 0, + }, + .push_constant_cbv = { + .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT, + .base_shader_register = 0, + }, + .descriptor_set_count = layout->set_count, + .descriptor_sets = layout->binding_translation, + .zero_based_vertex_instance_id = false, + .yz_flip = { + .mode = yz_flip_mode, + .y_mask = y_flip_mask, + .z_mask = z_flip_mask, + }, + .read_only_images_as_srvs = true, + }; + + struct dxil_spirv_debug_options dbg_opts = { + .dump_nir = !!(instance->debug_flags & DZN_DEBUG_NIR), + }; + + /* TODO: Extend spirv_to_dxil() to allow passing a custom allocator */ + bool success = + spirv_to_dxil((uint32_t *)module->data, module->size / sizeof(uint32_t), + spec, num_spec, + to_dxil_shader_stage(stage_info->stage), + stage_info->pName, &dbg_opts, &conf, &dxil_object); + + vk_free2(&device->vk.alloc, alloc, spec); + + if (!success) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_shader_blob blob(dxil_object.binary.buffer, dxil_object.binary.size); + ComPtr result; + validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit, &result); + + if (instance->debug_flags & DZN_DEBUG_DXIL) { + IDxcBlobEncoding *disassembly; + compiler->Disassemble(&blob, &disassembly); + ComPtr blobUtf8; + library->GetBlobAsUtf8(disassembly, blobUtf8.GetAddressOf()); + char *disasm = reinterpret_cast(blobUtf8->GetBufferPointer()); + disasm[blobUtf8->GetBufferSize() - 1] = 0; + fprintf(stderr, "== BEGIN SHADER ============================================\n" + "%s\n" + "== END SHADER ==============================================\n", + disasm); + disassembly->Release(); + } + + HRESULT validationStatus; + result->GetStatus(&validationStatus); + if (FAILED(validationStatus)) { + if (instance->debug_flags & DZN_DEBUG_DXIL) { + ComPtr printBlob, printBlobUtf8; + result->GetErrorBuffer(&printBlob); + library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf()); + + char *errorString; + if (printBlobUtf8) { + errorString = reinterpret_cast(printBlobUtf8->GetBufferPointer()); + + errorString[printBlobUtf8->GetBufferSize() - 1] = 0; + fprintf(stderr, + "== VALIDATION ERROR =============================================\n" + "%s\n" + "== END ==========================================================\n", + errorString); + } + } + + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + slot->pShaderBytecode = dxil_object.binary.buffer; + slot->BytecodeLength = dxil_object.binary.size; + return VK_SUCCESS; +} + +static D3D12_SHADER_BYTECODE * +dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, + VkShaderStageFlagBits in) +{ + switch (in) { + case VK_SHADER_STAGE_VERTEX_BIT: return &desc->VS; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return &desc->DS; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return &desc->HS; + case VK_SHADER_STAGE_GEOMETRY_BIT: return &desc->GS; + case VK_SHADER_STAGE_FRAGMENT_BIT: return &desc->PS; + default: unreachable("Unsupported stage"); + } +} + +static VkResult +dzn_graphics_pipeline_translate_vi(dzn_graphics_pipeline *pipeline, + const VkAllocationCallbacks *alloc, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in, + D3D12_INPUT_ELEMENT_DESC **input_elems) +{ + dzn_device *device = + container_of(pipeline->base.base.device, dzn_device, vk); + const VkPipelineVertexInputStateCreateInfo *in_vi = + in->pVertexInputState; + + if (!in_vi->vertexAttributeDescriptionCount) { + out->InputLayout.pInputElementDescs = NULL; + out->InputLayout.NumElements = 0; + *input_elems = NULL; + return VK_SUCCESS; + } + + *input_elems = (D3D12_INPUT_ELEMENT_DESC *) + vk_alloc2(&device->vk.alloc, alloc, + sizeof(**input_elems) * in_vi->vertexAttributeDescriptionCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!*input_elems) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + D3D12_INPUT_ELEMENT_DESC *inputs = *input_elems; + D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS]; + + pipeline->vb.count = 0; + for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) { + const struct VkVertexInputBindingDescription *bdesc = + &in_vi->pVertexBindingDescriptions[i]; + + pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1); + pipeline->vb.strides[bdesc->binding] = bdesc->stride; + if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) { + slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + } else { + assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX); + slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + } + } + + for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *attr = + &in_vi->pVertexAttributeDescriptions[i]; + + /* nir_to_dxil() name all vertex inputs as TEXCOORDx */ + inputs[i].SemanticName = "TEXCOORD"; + inputs[i].SemanticIndex = attr->location; + inputs[i].Format = dzn_buffer_get_dxgi_format(attr->format); + inputs[i].InputSlot = attr->binding; + inputs[i].InputSlotClass = slot_class[attr->binding]; + inputs[i].InstanceDataStepRate = + inputs[i].InputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0; + inputs[i].AlignedByteOffset = attr->offset; + } + + out->InputLayout.pInputElementDescs = inputs; + out->InputLayout.NumElements = in_vi->vertexAttributeDescriptionCount; + return VK_SUCCESS; +} + +static D3D12_PRIMITIVE_TOPOLOGY_TYPE +to_prim_topology_type(VkPrimitiveTopology in) +{ + switch (in) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; + default: unreachable("Invalid primitive topology"); + } +} + +static D3D12_PRIMITIVE_TOPOLOGY +to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points) +{ + switch (in) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + /* Triangle fans are emulated using an intermediate index buffer. */ + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ; + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + assert(patch_control_points); + return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1); + default: unreachable("Invalid primitive topology"); + } +} + +static void +dzn_graphics_pipeline_translate_ia(dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineInputAssemblyStateCreateInfo *in_ia = + in->pInputAssemblyState; + const VkPipelineTessellationStateCreateInfo *in_tes = + (out->DS.pShaderBytecode && out->HS.pShaderBytecode) ? + in->pTessellationState : NULL; + + out->PrimitiveTopologyType = to_prim_topology_type(in_ia->topology); + pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + pipeline->ia.topology = + to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0); + + /* FIXME: does that work for u16 index buffers? */ + if (in_ia->primitiveRestartEnable) + out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; + else + out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; +} + +static D3D12_FILL_MODE +translate_polygon_mode(VkPolygonMode in) +{ + switch (in) { + case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID; + case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME; + default: unreachable("Unsupported polygon mode"); + } +} + +static D3D12_CULL_MODE +translate_cull_mode(VkCullModeFlags in) +{ + switch (in) { + case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE; + case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT; + case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK; + /* Front+back face culling is equivalent to 'rasterization disabled' */ + case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE; + default: unreachable("Unsupported cull mode"); + } +} + +static void +dzn_graphics_pipeline_translate_rast(dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineRasterizationStateCreateInfo *in_rast = + in->pRasterizationState; + const VkPipelineViewportStateCreateInfo *in_vp = + in->pViewportState; + + if (in_vp) { + pipeline->vp.count = in_vp->viewportCount; + if (in_vp->pViewports) { + for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++) + dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]); + } + + pipeline->scissor.count = in_vp->scissorCount; + if (in_vp->pScissors) { + for (uint32_t i = 0; i < in_vp->scissorCount; i++) + dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]); + } + } + + out->RasterizerState.DepthClipEnable = !in_rast->depthClampEnable; + out->RasterizerState.FillMode = translate_polygon_mode(in_rast->polygonMode); + out->RasterizerState.CullMode = translate_cull_mode(in_rast->cullMode); + out->RasterizerState.FrontCounterClockwise = + in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; + if (in_rast->depthBiasEnable) { + out->RasterizerState.DepthBias = in_rast->depthBiasConstantFactor; + out->RasterizerState.SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor; + out->RasterizerState.DepthBiasClamp = in_rast->depthBiasClamp; + } + + assert(in_rast->lineWidth == 1.0f); +} + +static void +dzn_graphics_pipeline_translate_ms(dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineMultisampleStateCreateInfo *in_ms = + in->pMultisampleState; + + /* TODO: sampleShadingEnable, minSampleShading, + * alphaToOneEnable + */ + out->SampleDesc.Count = in_ms ? in_ms->rasterizationSamples : 1; + out->SampleDesc.Quality = 0; + out->SampleMask = in_ms && in_ms->pSampleMask ? + *in_ms->pSampleMask : + (1 << out->SampleDesc.Count) - 1; +} + +static D3D12_STENCIL_OP +translate_stencil_op(VkStencilOp in) +{ + switch (in) { + case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP; + case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO; + case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR; + case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT; + default: unreachable("Invalid stencil op"); + } +} + +static void +translate_stencil_test(dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineDepthStencilStateCreateInfo *in_zsa = + in->pDepthStencilState; + + bool front_test_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && + in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && + in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && + (pipeline->zsa.stencil_test.dynamic_compare_mask || + in_zsa->front.compareMask != 0); + bool back_test_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && + in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && + in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && + (pipeline->zsa.stencil_test.dynamic_compare_mask || + in_zsa->back.compareMask != 0); + + if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) + pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX; + else if (front_test_uses_ref) + pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask; + else + pipeline->zsa.stencil_test.front.compare_mask = 0; + + if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask) + pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX; + else if (back_test_uses_ref) + pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask; + else + pipeline->zsa.stencil_test.back.compare_mask = 0; + + bool diff_wr_mask = + in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && + (pipeline->zsa.stencil_test.dynamic_write_mask || + in_zsa->back.writeMask != in_zsa->front.writeMask); + bool diff_ref = + in->pRasterizationState->cullMode == VK_CULL_MODE_NONE && + (pipeline->zsa.stencil_test.dynamic_ref || + in_zsa->back.reference != in_zsa->front.reference); + bool diff_cmp_mask = + back_test_uses_ref && front_test_uses_ref && + (pipeline->zsa.stencil_test.dynamic_compare_mask || + pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask); + + if (diff_cmp_mask || diff_wr_mask) + pipeline->zsa.stencil_test.independent_front_back = true; + + bool back_wr_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) && + (in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER && + (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && + in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->depthTestEnable && + in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE); + bool front_wr_uses_ref = + !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) && + (in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER && + (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) && + in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) || + (in_zsa->depthTestEnable && + in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS && + in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE); + + pipeline->zsa.stencil_test.front.write_mask = + (pipeline->zsa.stencil_test.dynamic_write_mask || + (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ? + 0 : in_zsa->front.writeMask; + pipeline->zsa.stencil_test.back.write_mask = + (pipeline->zsa.stencil_test.dynamic_write_mask || + (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ? + 0 : in_zsa->back.writeMask; + + pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref; + pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref; + + if (diff_ref && + pipeline->zsa.stencil_test.front.uses_ref && + pipeline->zsa.stencil_test.back.uses_ref) + pipeline->zsa.stencil_test.independent_front_back = true; + + pipeline->zsa.stencil_test.front.ref = + pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference; + pipeline->zsa.stencil_test.back.ref = + pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference; + + /* FIXME: We don't support independent {compare,write}_mask and stencil + * reference. Until we have proper support for independent front/back + * stencil test, let's prioritize the front setup when both are active. + */ + out->DepthStencilState.StencilReadMask = + front_test_uses_ref ? + pipeline->zsa.stencil_test.front.compare_mask : + back_test_uses_ref ? + pipeline->zsa.stencil_test.back.compare_mask : 0; + out->DepthStencilState.StencilWriteMask = + pipeline->zsa.stencil_test.front.write_mask ? + pipeline->zsa.stencil_test.front.write_mask : + pipeline->zsa.stencil_test.back.write_mask; + + assert(!pipeline->zsa.stencil_test.independent_front_back); +} + +static void +dzn_graphics_pipeline_translate_zsa(dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineDepthStencilStateCreateInfo *in_zsa = + in->pDepthStencilState; + + if (!in_zsa) + return; + + /* TODO: depthBoundsTestEnable */ + + out->DepthStencilState.DepthEnable = in_zsa->depthTestEnable; + out->DepthStencilState.DepthWriteMask = + in_zsa->depthWriteEnable ? + D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + out->DepthStencilState.DepthFunc = + dzn_translate_compare_op(in_zsa->depthCompareOp); + out->DepthStencilState.StencilEnable = in_zsa->stencilTestEnable; + if (in_zsa->stencilTestEnable) { + out->DepthStencilState.FrontFace.StencilFailOp = + translate_stencil_op(in_zsa->front.failOp); + out->DepthStencilState.FrontFace.StencilDepthFailOp = + translate_stencil_op(in_zsa->front.depthFailOp); + out->DepthStencilState.FrontFace.StencilPassOp = + translate_stencil_op(in_zsa->front.passOp); + out->DepthStencilState.FrontFace.StencilFunc = + dzn_translate_compare_op(in_zsa->front.compareOp); + out->DepthStencilState.BackFace.StencilFailOp = + translate_stencil_op(in_zsa->back.failOp); + out->DepthStencilState.BackFace.StencilDepthFailOp = + translate_stencil_op(in_zsa->back.depthFailOp); + out->DepthStencilState.BackFace.StencilPassOp = + translate_stencil_op(in_zsa->back.passOp); + out->DepthStencilState.BackFace.StencilFunc = + dzn_translate_compare_op(in_zsa->back.compareOp); + + pipeline->zsa.stencil_test.enable = true; + + translate_stencil_test(pipeline, out, in); + } +} + +static D3D12_BLEND +translate_blend_factor(VkBlendFactor in) +{ + switch (in) { + case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO; + case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE; + case VK_BLEND_FACTOR_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; + case VK_BLEND_FACTOR_DST_COLOR: return D3D12_BLEND_DEST_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; + case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + /* FIXME: no way to isolate the alpla and color constants */ + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + return D3D12_BLEND_BLEND_FACTOR; + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + return D3D12_BLEND_INV_BLEND_FACTOR; + case VK_BLEND_FACTOR_SRC1_COLOR: return D3D12_BLEND_SRC1_COLOR; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: return D3D12_BLEND_INV_SRC1_COLOR; + case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + default: unreachable("Invalid blend factor"); + } +} + +static D3D12_BLEND_OP +translate_blend_op(VkBlendOp in) +{ + switch (in) { + case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD; + case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; + case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; + case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN; + case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX; + default: unreachable("Invalid blend op"); + } +} + +static D3D12_LOGIC_OP +translate_logic_op(VkLogicOp in) +{ + switch (in) { + case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR; + case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND; + case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; + case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY; + case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; + case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP; + case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR; + case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR; + case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR; + case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV; + case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT; + case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; + case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; + case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; + case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND; + case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET; + default: unreachable("Invalid logic op"); + } +} + +static void +dzn_graphics_pipeline_translate_blend(dzn_graphics_pipeline *pipeline, + D3D12_GRAPHICS_PIPELINE_STATE_DESC *out, + const VkGraphicsPipelineCreateInfo *in) +{ + const VkPipelineColorBlendStateCreateInfo *in_blend = + in->pColorBlendState; + const VkPipelineMultisampleStateCreateInfo *in_ms = + in->pMultisampleState; + + if (!in_blend || !in_ms) + return; + + D3D12_LOGIC_OP logicop = + in_blend->logicOpEnable ? + translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP; + out->BlendState.AlphaToCoverageEnable = in_ms->alphaToCoverageEnable; + for (uint32_t i = 0; i < in_blend->attachmentCount; i++) { + if (i > 0 && + !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i], + sizeof(*in_blend->pAttachments))) + out->BlendState.IndependentBlendEnable = true; + + out->BlendState.RenderTarget[i].BlendEnable = + in_blend->pAttachments[i].blendEnable; + in_blend->logicOpEnable; + out->BlendState.RenderTarget[i].RenderTargetWriteMask = + in_blend->pAttachments[i].colorWriteMask; + if (in_blend->logicOpEnable) { + out->BlendState.RenderTarget[i].LogicOpEnable = true; + out->BlendState.RenderTarget[i].LogicOp = logicop; + } else { + out->BlendState.RenderTarget[i].SrcBlend = + translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor); + out->BlendState.RenderTarget[i].DestBlend = + translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor); + out->BlendState.RenderTarget[i].BlendOp = + translate_blend_op(in_blend->pAttachments[i].colorBlendOp); + out->BlendState.RenderTarget[i].SrcBlendAlpha = + translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor); + out->BlendState.RenderTarget[i].DestBlendAlpha = + translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor); + out->BlendState.RenderTarget[i].BlendOpAlpha = + translate_blend_op(in_blend->pAttachments[i].alphaBlendOp); + } + } +} + + +static void +dzn_pipeline_init(dzn_pipeline *pipeline, + dzn_device *device, + VkPipelineBindPoint type, + dzn_pipeline_layout *layout) +{ + pipeline->type = type; + pipeline->root.sets_param_count = layout->root.sets_param_count; + pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx; + pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx; + STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type)); + memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type)); + pipeline->root.sig = layout->root.sig; + pipeline->root.sig->AddRef(); + + STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count)); + memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count)); + + STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets)); + memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets)); + vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); +} + +static void +dzn_pipeline_finish(dzn_pipeline *pipeline) +{ + if (pipeline->state) + pipeline->state->Release(); + if (pipeline->root.sig) + pipeline->root.sig->Release(); + + vk_object_base_finish(&pipeline->base); +} + +static void +dzn_graphics_pipeline_destroy(dzn_graphics_pipeline *pipeline, + const VkAllocationCallbacks *alloc) +{ + if (!pipeline) + return; + + for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) { + if (pipeline->indirect_cmd_sigs[i]) + pipeline->indirect_cmd_sigs[i]->Release(); + } + + dzn_pipeline_finish(&pipeline->base); + vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); +} + +static VkResult +dzn_graphics_pipeline_create(dzn_device *device, + VkPipelineCache cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *out) +{ + VK_FROM_HANDLE(dzn_render_pass, pass, pCreateInfo->renderPass); + VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); + const dzn_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + uint32_t stage_mask = 0; + VkResult ret; + HRESULT hres = 0; + + dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_pipeline_init(&pipeline->base, device, + VK_PIPELINE_BIND_POINT_GRAPHICS, + layout); + D3D12_INPUT_ELEMENT_DESC *inputs = NULL; + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { + .pRootSignature = pipeline->base.root.sig, + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + ret = dzn_graphics_pipeline_translate_vi(pipeline, pAllocator, &desc, pCreateInfo, &inputs); + if (ret != VK_SUCCESS) + goto out; + + if (pCreateInfo->pDynamicState) { + for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) { + switch (pCreateInfo->pDynamicState->pDynamicStates[i]) { + case VK_DYNAMIC_STATE_VIEWPORT: + pipeline->vp.dynamic = true; + break; + case VK_DYNAMIC_STATE_SCISSOR: + pipeline->scissor.dynamic = true; + break; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + pipeline->zsa.stencil_test.dynamic_ref = true; + break; + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + pipeline->zsa.stencil_test.dynamic_compare_mask = true; + break; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + pipeline->zsa.stencil_test.dynamic_write_mask = true; + break; + default: unreachable("Unsupported dynamic state"); + } + } + } + + dzn_graphics_pipeline_translate_ia(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_rast(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_ms(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_zsa(pipeline, &desc, pCreateInfo); + dzn_graphics_pipeline_translate_blend(pipeline, &desc, pCreateInfo); + + desc.NumRenderTargets = subpass->color_count; + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t idx = subpass->colors[i].idx; + + if (idx == VK_ATTACHMENT_UNUSED) continue; + + const struct dzn_attachment *attachment = &pass->attachments[idx]; + + desc.RTVFormats[i] = + dzn_image_get_dxgi_format(attachment->format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_COLOR_BIT); + } + + if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) { + const struct dzn_attachment *attachment = + &pass->attachments[subpass->zs.idx]; + + desc.DSVFormat = + dzn_image_get_dxgi_format(attachment->format, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT); + } + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) + stage_mask |= pCreateInfo->pStages[i].stage; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT && + pCreateInfo->pRasterizationState && + (pCreateInfo->pRasterizationState->rasterizerDiscardEnable || + pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) { + /* Disable rasterization (AKA leave fragment shader NULL) when + * front+back culling or discard is set. + */ + continue; + } + + D3D12_SHADER_BYTECODE *slot = + dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage); + enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE; + uint16_t y_flip_mask = 0, z_flip_mask = 0; + + if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT || + (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_VERTEX_BIT && + !(stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT))) { + if (pipeline->vp.dynamic) { + yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL; + } else if (pCreateInfo->pViewportState) { + const VkPipelineViewportStateCreateInfo *vp_info = + pCreateInfo->pViewportState; + + for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) { + if (vp_info->pViewports[i].height > 0) + y_flip_mask |= BITFIELD_BIT(i); + + if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth) + z_flip_mask |= BITFIELD_BIT(i); + } + + if (y_flip_mask && z_flip_mask) + yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL; + else if (z_flip_mask) + yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL; + else if (y_flip_mask) + yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; + } + } + + ret = dzn_pipeline_compile_shader(device, pAllocator, + layout, &pCreateInfo->pStages[i], + yz_flip_mode, y_flip_mask, z_flip_mask, slot); + if (ret != VK_SUCCESS) + goto out; + } + + + hres = device->dev->CreateGraphicsPipelineState(&desc, + IID_PPV_ARGS(&pipeline->base.state)); + if (FAILED(hres)) { + ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + + ret = VK_SUCCESS; + +out: + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + D3D12_SHADER_BYTECODE *slot = + dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage); + free((void *)slot->pShaderBytecode); + } + + vk_free2(&device->vk.alloc, pAllocator, inputs); + if (ret != VK_SUCCESS) + dzn_graphics_pipeline_destroy(pipeline, pAllocator); + else + *out = dzn_graphics_pipeline_to_handle(pipeline); + + return ret; +} + +#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 3 + +ID3D12CommandSignature * +dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline *pipeline, + enum dzn_indirect_draw_cmd_sig_type type) +{ + assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS); + + dzn_device *device = + container_of(pipeline->base.base.device, dzn_device, vk); + ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type]; + + if (cmdsig) + return cmdsig; + + bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG; + bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan; + + uint32_t cmd_arg_count = 0; + D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS]; + + if (triangle_fan) { + cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW, + }; + } + + cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + .Constant = { + .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, + .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4, + .Num32BitValuesToSet = 2, + }, + }; + + cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC { + .Type = indexed ? + D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED : + D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, + }; + + assert(cmd_arg_count <= ARRAY_SIZE(cmd_args)); + assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0); + + D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = { + .ByteStride = + triangle_fan ? + sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) : + sizeof(struct dzn_indirect_draw_exec_params), + .NumArgumentDescs = cmd_arg_count, + .pArgumentDescs = cmd_args, + }; + HRESULT hres = + device->dev->CreateCommandSignature(&cmd_sig_desc, + pipeline->base.root.sig, + IID_PPV_ARGS(&cmdsig)); + if (FAILED(hres)) + return NULL; + + pipeline->indirect_cmd_sigs[type] = cmdsig; + return cmdsig; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateGraphicsPipelines(VkDevice dev, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VkResult result = VK_SUCCESS; + + unsigned i; + for (i = 0; i < count; i++) { + result = dzn_graphics_pipeline_create(device, + pipelineCache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result != VK_SUCCESS) { + pPipelines[i] = VK_NULL_HANDLE; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) + break; + + if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) + break; + } + } + + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + + return result; +} + +static void +dzn_compute_pipeline_destroy(dzn_compute_pipeline *pipeline, + const VkAllocationCallbacks *alloc) +{ + if (!pipeline) + return; + + if (pipeline->indirect_cmd_sig) + pipeline->indirect_cmd_sig->Release(); + + dzn_pipeline_finish(&pipeline->base); + vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline); +} + +static VkResult +dzn_compute_pipeline_create(dzn_device *device, + VkPipelineCache cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *out) +{ + VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout); + + dzn_compute_pipeline *pipeline = (dzn_compute_pipeline *) + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pipeline) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + dzn_pipeline_init(&pipeline->base, device, + VK_PIPELINE_BIND_POINT_COMPUTE, + layout); + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { + .pRootSignature = pipeline->base.root.sig, + .Flags = D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + VkResult ret = + dzn_pipeline_compile_shader(device, pAllocator, layout, + &pCreateInfo->stage, + DXIL_SPIRV_YZ_FLIP_NONE, 0, 0, + &desc.CS); + if (ret != VK_SUCCESS) + goto out; + + if (FAILED(device->dev->CreateComputePipelineState(&desc, + IID_PPV_ARGS(&pipeline->base.state)))) { + ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto out; + } + +out: + free((void *)desc.CS.pShaderBytecode); + if (ret != VK_SUCCESS) + dzn_compute_pipeline_destroy(pipeline, pAllocator); + else + *out = dzn_compute_pipeline_to_handle(pipeline); + + return ret; +} + +ID3D12CommandSignature * +dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline *pipeline) +{ + if (pipeline->indirect_cmd_sig) + return pipeline->indirect_cmd_sig; + + dzn_device *device = + container_of(pipeline->base.base.device, dzn_device, vk); + + D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = { + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + .Constant = { + .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx, + .DestOffsetIn32BitValues = 0, + .Num32BitValuesToSet = 3, + }, + }, + { + .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, + }, + }; + + D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = { + .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, + .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args), + .pArgumentDescs = indirect_dispatch_args, + }; + + HRESULT hres = + device->dev->CreateCommandSignature(&indirect_dispatch_desc, + pipeline->base.root.sig, + IID_PPV_ARGS(&pipeline->indirect_cmd_sig)); + if (FAILED(hres)) + return NULL; + + return pipeline->indirect_cmd_sig; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateComputePipelines(VkDevice dev, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(dzn_device, device, dev); + VkResult result = VK_SUCCESS; + + unsigned i; + for (i = 0; i < count; i++) { + result = dzn_compute_pipeline_create(device, + pipelineCache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result != VK_SUCCESS) { + pPipelines[i] = VK_NULL_HANDLE; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) + break; + + if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) + break; + } + } + + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyPipeline(VkDevice device, + VkPipeline pipeline, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline); + + if (!pipe) + return; + + if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) { + dzn_graphics_pipeline *gfx = container_of(pipe, dzn_graphics_pipeline, base); + dzn_graphics_pipeline_destroy(gfx, pAllocator); + } else { + assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE); + dzn_compute_pipeline *compute = container_of(pipe, dzn_compute_pipeline, base); + dzn_compute_pipeline_destroy(compute, pAllocator); + } +} diff --git a/src/microsoft/vulkan/dzn_pipeline_cache.cpp b/src/microsoft/vulkan/dzn_pipeline_cache.cpp new file mode 100644 index 00000000000..321ff840553 --- /dev/null +++ b/src/microsoft/vulkan/dzn_pipeline_cache.cpp @@ -0,0 +1,99 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" + +static void +dzn_pipeline_cache_destroy(dzn_pipeline_cache *pcache, + const VkAllocationCallbacks *pAllocator) +{ + if (!pcache) + return; + + dzn_device *device = container_of(pcache->base.device, dzn_device, vk); + + vk_object_base_finish(&pcache->base); + vk_free2(&device->vk.alloc, pAllocator, pcache); +} + +static VkResult +dzn_pipeline_cache_create(dzn_device *device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *out) +{ + dzn_pipeline_cache *pcache = (dzn_pipeline_cache *) + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pcache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pcache) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &pcache->base, VK_OBJECT_TYPE_PIPELINE_CACHE); + + /* TODO: cache-ism! */ + + *out = dzn_pipeline_cache_to_handle(pcache); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreatePipelineCache(VkDevice device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) +{ + return dzn_pipeline_cache_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pPipelineCache); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyPipelineCache(VkDevice device, + VkPipelineCache pipelineCache, + const VkAllocationCallbacks *pAllocator) +{ + return dzn_pipeline_cache_destroy(dzn_pipeline_cache_from_handle(pipelineCache), + pAllocator); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetPipelineCacheData(VkDevice device, + VkPipelineCache pipelineCache, + size_t *pDataSize, + void *pData) +{ + // FIXME + *pDataSize = 0; + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_MergePipelineCaches(VkDevice device, + VkPipelineCache dstCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) +{ + // FIXME + return VK_SUCCESS; +} diff --git a/src/microsoft/vulkan/dzn_private.h b/src/microsoft/vulkan/dzn_private.h new file mode 100644 index 00000000000..2b4ccfe0f01 --- /dev/null +++ b/src/microsoft/vulkan/dzn_private.h @@ -0,0 +1,1060 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DZN_PRIVATE_H +#define DZN_PRIVATE_H + +#include "vk_command_pool.h" +#include "vk_command_buffer.h" +#include "vk_cmd_queue.h" +#include "vk_debug_report.h" +#include "vk_device.h" +#include "vk_image.h" +#include "vk_log.h" +#include "vk_physical_device.h" +#include "vk_sync.h" +#include "vk_queue.h" +#include "vk_shader_module.h" +#include "wsi_common.h" + +#include "util/bitset.h" +#include "util/blob.h" +#include "util/hash_table.h" +#include "util/u_dynarray.h" +#include "util/log.h" + +#include "shader_enums.h" + +#include "dzn_entrypoints.h" +#include "dzn_nir.h" + +#include +#include + +#include + +#define D3D12_IGNORE_SDK_LAYERS +#include +#include +#include + +#include "spirv_to_dxil.h" + +using Microsoft::WRL::ComPtr; + +#define DZN_SWAP(a, b) \ + do { \ + auto __tmp = a; \ + a = b; \ + b = __tmp; \ + } while (0) + +#define dzn_stub() unreachable("Unsupported feature") + +struct dzn_instance; +struct dzn_device; + +struct dzn_meta { + static void + compile_shader(struct dzn_device *pdev, + nir_shader *nir, + D3D12_SHADER_BYTECODE *slot); + + struct dzn_device *device; +}; + +struct dzn_meta_indirect_draw { + ID3D12RootSignature *root_sig; + ID3D12PipelineState *pipeline_state; +}; + +enum dzn_index_type { + DZN_NO_INDEX, + DZN_INDEX_2B, + DZN_INDEX_4B, + DZN_NUM_INDEX_TYPE, +}; + +static inline enum dzn_index_type +dzn_index_type_from_size(uint8_t index_size) +{ + switch (index_size) { + case 0: return DZN_NO_INDEX; + case 2: return DZN_INDEX_2B; + case 4: return DZN_INDEX_4B; + default: unreachable("Invalid index size"); + } +} + +static inline enum dzn_index_type +dzn_index_type_from_dxgi_format(DXGI_FORMAT format) +{ + switch (format) { + case DXGI_FORMAT_UNKNOWN: return DZN_NO_INDEX; + case DXGI_FORMAT_R16_UINT: return DZN_INDEX_2B; + case DXGI_FORMAT_R32_UINT: return DZN_INDEX_4B; + default: unreachable("Invalid index format"); + } +} + +static inline uint8_t +dzn_index_size(enum dzn_index_type type) +{ + switch (type) { + case DZN_NO_INDEX: return 0; + case DZN_INDEX_2B: return 2; + case DZN_INDEX_4B: return 4; + default: unreachable("Invalid index type"); + } +} + +struct dzn_meta_triangle_fan_rewrite_index { + ID3D12RootSignature *root_sig; + ID3D12PipelineState *pipeline_state; + ID3D12CommandSignature *cmd_sig; +}; + +struct dzn_meta_blit_key { + union { + struct { + DXGI_FORMAT out_format; + uint32_t samples : 6; + uint32_t loc : 4; + uint32_t out_type : 4; + uint32_t sampler_dim : 4; + uint32_t src_is_array : 1; + uint32_t resolve : 1; + uint32_t linear_filter : 1; + uint32_t padding : 11; + }; + const uint64_t u64; + }; +}; + +struct dzn_meta_blit { + ID3D12RootSignature *root_sig; + ID3D12PipelineState *pipeline_state; +}; + +struct dzn_meta_blits { + mtx_t shaders_lock; + D3D12_SHADER_BYTECODE vs; + struct hash_table *fs; + mtx_t contexts_lock; + struct hash_table_u64 *contexts; +}; + +const dzn_meta_blit * +dzn_meta_blits_get_context(dzn_device *device, const dzn_meta_blit_key *key); + +#define MAX_SYNC_TYPES 2 +#define MAX_QUEUE_FAMILIES 3 + +struct dzn_physical_device { + struct vk_physical_device vk; + struct list_head link; + + struct vk_device_extension_table supported_extensions; + struct vk_physical_device_dispatch_table dispatch; + + IDXGIAdapter1 *adapter; + DXGI_ADAPTER_DESC1 adapter_desc; + + uint32_t queue_family_count; + struct { + VkQueueFamilyProperties props; + D3D12_COMMAND_QUEUE_DESC desc; + } queue_families[MAX_QUEUE_FAMILIES]; + + uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; + uint8_t device_uuid[VK_UUID_SIZE]; + uint8_t driver_uuid[VK_UUID_SIZE]; + + struct wsi_device wsi_device; + + mtx_t dev_lock; + ID3D12Device1 *dev; + D3D_FEATURE_LEVEL feature_level; + D3D12_FEATURE_DATA_ARCHITECTURE1 architecture; + D3D12_FEATURE_DATA_D3D12_OPTIONS options; + VkPhysicalDeviceMemoryProperties memory; + D3D12_HEAP_FLAGS heap_flags_for_mem_type[VK_MAX_MEMORY_TYPES]; + const struct vk_sync_type *sync_types[MAX_SYNC_TYPES + 1]; + float timestamp_period; +}; + +D3D12_FEATURE_DATA_FORMAT_SUPPORT +dzn_physical_device_get_format_support(dzn_physical_device *pdev, + VkFormat format); + +uint32_t +dzn_physical_device_get_mem_type_mask_for_resource(const dzn_physical_device *pdev, + const D3D12_RESOURCE_DESC *desc); + +#define dzn_debug_ignored_stype(sType) \ + mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType)) + +IDXGIFactory4 * +dxgi_get_factory(bool debug); + +IDxcValidator * +dxil_get_validator(void); + +IDxcLibrary * +dxc_get_library(void); + +IDxcCompiler * +dxc_get_compiler(void); + +PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE +d3d12_get_serialize_root_sig(void); + +void +d3d12_enable_debug_layer(); + +void +d3d12_enable_gpu_validation(); + +ID3D12Device1 * +d3d12_create_device(IUnknown *adapter, bool experimental_features); + +struct dzn_queue { + struct vk_queue vk; + + ID3D12CommandQueue *cmdqueue; + ID3D12Fence *fence; + uint64_t fence_point = 0; +}; + +struct dzn_device { + struct vk_device vk; + struct vk_device_extension_table enabled_extensions; + struct vk_device_dispatch_table cmd_dispatch; + + ID3D12Device1 *dev; + + struct dzn_meta_indirect_draw indirect_draws[DZN_NUM_INDIRECT_DRAW_TYPES]; + struct dzn_meta_triangle_fan_rewrite_index triangle_fan[DZN_NUM_INDEX_TYPE]; + struct dzn_meta_blits blits; + + struct { +#define DZN_QUERY_REFS_SECTION_SIZE 4096 +#define DZN_QUERY_REFS_ALL_ONES_OFFSET 0 +#define DZN_QUERY_REFS_ALL_ZEROS_OFFSET (DZN_QUERY_REFS_ALL_ONES_OFFSET + DZN_QUERY_REFS_SECTION_SIZE) +#define DZN_QUERY_REFS_RES_SIZE (DZN_QUERY_REFS_ALL_ZEROS_OFFSET + DZN_QUERY_REFS_SECTION_SIZE) + ID3D12Resource *refs; + } queries; +}; + +void dzn_meta_finish(dzn_device *device); + +VkResult dzn_meta_init(dzn_device *device); + +const dzn_meta_blit * +dzn_meta_blits_get_context(dzn_device *device, + const dzn_meta_blit_key *key); + +ID3D12RootSignature * +dzn_device_create_root_sig(dzn_device *device, + const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc); + +struct dzn_device_memory { + struct vk_object_base base; + + struct list_head link; + + ID3D12Heap *heap; + VkDeviceSize size; + D3D12_RESOURCE_STATES initial_state; /* initial state for this memory type */ + + /* A buffer-resource spanning the entire heap, used for mapping memory */ + ID3D12Resource *map_res; + + VkDeviceSize map_size; + void *map; +}; + +enum dzn_cmd_bindpoint_dirty { + DZN_CMD_BINDPOINT_DIRTY_PIPELINE = 1 << 0, + DZN_CMD_BINDPOINT_DIRTY_HEAPS = 1 << 1, + DZN_CMD_BINDPOINT_DIRTY_SYSVALS = 1 << 2, +}; + +enum dzn_cmd_dirty { + DZN_CMD_DIRTY_VIEWPORTS = 1 << 0, + DZN_CMD_DIRTY_SCISSORS = 1 << 1, + DZN_CMD_DIRTY_IB = 1 << 2, + DZN_CMD_DIRTY_STENCIL_REF = 1 << 3, + DZN_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 4, + DZN_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 5, +}; + +#define MAX_VBS 16 +#define MAX_VP 16 +#define MAX_SCISSOR 16 +#define MAX_SETS 4 +#define MAX_DYNAMIC_UNIFORM_BUFFERS 8 +#define MAX_DYNAMIC_STORAGE_BUFFERS 4 +#define MAX_DYNAMIC_BUFFERS \ + (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) +#define MAX_PUSH_CONSTANT_DWORDS 32 + +#define NUM_BIND_POINT VK_PIPELINE_BIND_POINT_COMPUTE + 1 +#define NUM_POOL_TYPES D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1 + +#define dzn_foreach_pool_type(type) \ + for (D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; \ + type <= D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; \ + type = (D3D12_DESCRIPTOR_HEAP_TYPE)(type + 1)) + +struct dzn_cmd_event_signal { + struct dzn_event *event; + bool value; +}; + +struct dzn_cmd_buffer; + +struct dzn_attachment { + uint32_t idx; + VkFormat format; + uint32_t samples; + union { + bool color; + struct { + bool depth; + bool stencil; + }; + } clear; + D3D12_RESOURCE_STATES before, last, after; +}; + +struct dzn_attachment_ref { + uint32_t idx; + D3D12_RESOURCE_STATES before, during; +}; + +struct dzn_descriptor_state { + struct { + const struct dzn_descriptor_set *set; + uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; + } sets[MAX_SETS]; + struct dzn_descriptor_heap *heaps[NUM_POOL_TYPES]; +}; + +struct dzn_sampler; +struct dzn_image_view; + +struct dzn_buffer_desc { + VkDescriptorType type; + const struct dzn_buffer *buffer; + VkDeviceSize range; + VkDeviceSize offset; +}; + +struct dzn_descriptor_heap { + ID3D12Device *dev; + ID3D12DescriptorHeap *heap; + D3D12_DESCRIPTOR_HEAP_TYPE type; + SIZE_T cpu_base; + uint64_t gpu_base; + uint32_t desc_count; + uint32_t desc_sz; +}; + +bool +dzn_descriptor_type_depends_on_shader_usage(VkDescriptorType type); + +D3D12_CPU_DESCRIPTOR_HANDLE +dzn_descriptor_heap_get_cpu_handle(const dzn_descriptor_heap *heap, uint32_t slot); + +D3D12_GPU_DESCRIPTOR_HANDLE +dzn_descriptor_heap_get_gpu_handle(const dzn_descriptor_heap *heap, uint32_t slot); + +void +dzn_descriptor_heap_write_image_view_desc(dzn_descriptor_heap *heap, + uint32_t heap_offset, + bool writeable, + bool cube_as_2darray, + const dzn_image_view *iview); + +void +dzn_descriptor_heap_write_buffer_desc(dzn_descriptor_heap *heap, + uint32_t heap_offset, + bool writeable, + const dzn_buffer_desc *bdesc); + +void +dzn_descriptor_heap_copy(dzn_descriptor_heap *dst_heap, uint32_t dst_heap_offset, + const dzn_descriptor_heap *src_heap, uint32_t src_heap_offset, + uint32_t desc_count); + +VkResult +dzn_descriptor_heap_init(dzn_descriptor_heap *heap, dzn_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t count, + bool shader_visible); + +void +dzn_descriptor_heap_finish(dzn_descriptor_heap *heap); + +struct dzn_descriptor_heap_pool_entry { + struct list_head link; + dzn_descriptor_heap heap; +}; + +struct dzn_descriptor_heap_pool { + const VkAllocationCallbacks *alloc; + D3D12_DESCRIPTOR_HEAP_TYPE type; + bool shader_visible; + struct list_head active_heaps, free_heaps; + uint32_t offset; + uint32_t desc_sz; +}; + +void +dzn_descriptor_heap_pool_init(dzn_descriptor_heap_pool *pool, + dzn_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, + bool shader_visible, + const VkAllocationCallbacks *alloc); + +void +dzn_descriptor_heap_pool_finish(dzn_descriptor_heap_pool *pool); + +void +dzn_descriptor_heap_pool_reset(dzn_descriptor_heap_pool *pool); + +VkResult +dzn_descriptor_heap_pool_alloc_slots(dzn_descriptor_heap_pool *pool, + dzn_device *device, + uint32_t num_slots, + dzn_descriptor_heap **heap, + uint32_t *first_slot); + +struct dzn_cmd_buffer_query_range { + struct dzn_query_pool *qpool; + uint32_t start, count; +}; + +struct dzn_cmd_buffer_query_pool_state { + struct util_dynarray reset, collect, wait, signal; +}; + +struct dzn_internal_resource { + struct list_head link; + ID3D12Resource *res; +}; + +enum dzn_event_state { + DZN_EVENT_STATE_EXTERNAL_WAIT = -1, + DZN_EVENT_STATE_RESET = 0, + DZN_EVENT_STATE_SET = 1, +}; + +struct dzn_cmd_buffer_push_constant_state { + uint32_t offset; + uint32_t end; + uint32_t values[MAX_PUSH_CONSTANT_DWORDS]; +}; + +struct dzn_cmd_buffer_state { + struct dzn_framebuffer *framebuffer; + D3D12_RECT render_area; + const struct dzn_pipeline *pipeline; + dzn_descriptor_heap *heaps[NUM_POOL_TYPES]; + struct dzn_render_pass *pass; + struct { + BITSET_DECLARE(dirty, MAX_VBS); + D3D12_VERTEX_BUFFER_VIEW views[MAX_VBS]; + } vb; + struct { + D3D12_INDEX_BUFFER_VIEW view; + } ib; + struct { + struct { + struct { + uint32_t ref, compare_mask, write_mask; + } front, back; + } stencil_test; + } zsa; + D3D12_VIEWPORT viewports[MAX_VP]; + D3D12_RECT scissors[MAX_SCISSOR]; + struct { + struct dzn_cmd_buffer_push_constant_state gfx, compute; + } push_constant; + uint32_t dirty; + uint32_t subpass; + struct { + struct dzn_pipeline *pipeline; + struct dzn_descriptor_state desc_state; + uint32_t dirty; + } bindpoint[NUM_BIND_POINT]; + union { + struct dxil_spirv_vertex_runtime_data gfx; + struct dxil_spirv_compute_runtime_data compute; + } sysvals; +}; + +struct dzn_cmd_buffer_rtv_key { + const struct dzn_image *image; + D3D12_RENDER_TARGET_VIEW_DESC desc; +}; + +struct dzn_cmd_buffer_rtv_entry { + struct dzn_cmd_buffer_rtv_key key; + D3D12_CPU_DESCRIPTOR_HANDLE handle; +}; + +struct dzn_cmd_buffer_dsv_key { + const struct dzn_image *image; + D3D12_DEPTH_STENCIL_VIEW_DESC desc; +}; + +struct dzn_cmd_buffer_dsv_entry { + struct dzn_cmd_buffer_dsv_key key; + D3D12_CPU_DESCRIPTOR_HANDLE handle; +}; + +struct dzn_cmd_buffer { + struct vk_command_buffer vk; + VkResult error; + struct dzn_cmd_buffer_state state; + + struct { + struct hash_table *ht; + struct util_dynarray reset; + struct util_dynarray wait; + struct util_dynarray signal; + } queries; + + struct { + struct hash_table *ht; + struct util_dynarray wait; + struct util_dynarray signal; + } events; + + struct { + struct hash_table *ht; + dzn_descriptor_heap_pool pool; + } rtvs, dsvs; + + dzn_descriptor_heap_pool cbv_srv_uav_pool, sampler_pool; + + struct list_head internal_bufs; + + ID3D12CommandAllocator *cmdalloc; + ID3D12GraphicsCommandList1 *cmdlist; +}; + +struct dzn_descriptor_pool { + struct vk_object_base base; + VkAllocationCallbacks alloc; + + uint32_t set_count; + uint32_t used_set_count; + dzn_descriptor_set *sets; + dzn_descriptor_heap heaps[NUM_POOL_TYPES]; + uint32_t desc_count[NUM_POOL_TYPES]; + uint32_t used_desc_count[NUM_POOL_TYPES]; + uint32_t free_offset[NUM_POOL_TYPES]; + mtx_t defragment_lock; +}; + +#define MAX_SHADER_VISIBILITIES (D3D12_SHADER_VISIBILITY_PIXEL + 1) + +struct dzn_descriptor_set_layout_binding { + VkDescriptorType type; + D3D12_SHADER_VISIBILITY visibility; + uint32_t base_shader_register; + uint32_t range_idx[NUM_POOL_TYPES]; + union { + struct { + uint32_t static_sampler_idx; + uint32_t immutable_sampler_idx; + }; + uint32_t dynamic_buffer_idx; + }; +}; + +struct dzn_descriptor_set_layout { + struct vk_object_base base; + uint32_t range_count[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES]; + const D3D12_DESCRIPTOR_RANGE1 *ranges[MAX_SHADER_VISIBILITIES][NUM_POOL_TYPES]; + uint32_t range_desc_count[NUM_POOL_TYPES]; + uint32_t static_sampler_count; + const D3D12_STATIC_SAMPLER_DESC *static_samplers; + uint32_t immutable_sampler_count; + const dzn_sampler **immutable_samplers; + struct { + uint32_t bindings[MAX_DYNAMIC_BUFFERS]; + uint32_t count; + uint32_t desc_count; + uint32_t range_offset; + } dynamic_buffers; + uint32_t binding_count; + const struct dzn_descriptor_set_layout_binding *bindings; +}; + +struct dzn_descriptor_set { + struct vk_object_base base; + struct dzn_buffer_desc dynamic_buffers[MAX_DYNAMIC_BUFFERS]; + dzn_descriptor_pool *pool; + uint32_t heap_offsets[NUM_POOL_TYPES]; + uint32_t heap_sizes[NUM_POOL_TYPES]; + const struct dzn_descriptor_set_layout *layout; +}; + +struct dzn_pipeline_layout { + struct vk_object_base base; + int32_t refcount; + struct { + uint32_t heap_offsets[NUM_POOL_TYPES]; + struct { + uint32_t srv, uav; + } dynamic_buffer_heap_offsets[MAX_DYNAMIC_BUFFERS]; + uint32_t dynamic_buffer_count; + uint32_t range_desc_count[NUM_POOL_TYPES]; + } sets[MAX_SETS]; + dxil_spirv_vulkan_descriptor_set binding_translation[MAX_SETS]; + uint32_t set_count; + uint32_t desc_count[NUM_POOL_TYPES]; + struct { + uint32_t param_count; + uint32_t sets_param_count; + uint32_t sysval_cbv_param_idx; + uint32_t push_constant_cbv_param_idx; + D3D12_DESCRIPTOR_HEAP_TYPE type[MAX_SHADER_VISIBILITIES]; + ID3D12RootSignature *sig; + } root; +}; + +dzn_pipeline_layout * +dzn_pipeline_layout_ref(dzn_pipeline_layout *layout); + +void +dzn_pipeline_layout_unref(dzn_pipeline_layout *layout); + +#define MAX_RTS 8 +#define MAX_INPUT_ATTACHMENTS 4 + +struct dzn_subpass { + uint32_t color_count; + struct dzn_attachment_ref colors[MAX_RTS]; + struct dzn_attachment_ref resolve[MAX_RTS]; + struct dzn_attachment_ref zs; + uint32_t input_count; + struct dzn_attachment_ref inputs[MAX_INPUT_ATTACHMENTS]; +}; + +struct dzn_render_pass { + struct vk_object_base base; + uint32_t attachment_count; + struct dzn_attachment *attachments; + uint32_t subpass_count; + struct dzn_subpass *subpasses; +}; + +struct dzn_pipeline_cache { + struct vk_object_base base; +}; + +enum dzn_register_space { + DZN_REGISTER_SPACE_SYSVALS = MAX_SETS, + DZN_REGISTER_SPACE_PUSH_CONSTANT, +}; + +class dzn_shader_blob : public IDxcBlob { +public: + dzn_shader_blob(void *buf, size_t sz) : data(buf), size(sz) {} + + LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return data; } + + SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return size; } + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; } + + ULONG STDMETHODCALLTYPE AddRef() override { return 1; } + + ULONG STDMETHODCALLTYPE Release() override { return 0; } + + void *data; + size_t size; +}; + +struct dzn_pipeline { + struct vk_object_base base; + VkPipelineBindPoint type; + dzn_device *device; + struct { + uint32_t sets_param_count; + uint32_t sysval_cbv_param_idx; + uint32_t push_constant_cbv_param_idx; + D3D12_DESCRIPTOR_HEAP_TYPE type[MAX_SHADER_VISIBILITIES]; + ID3D12RootSignature *sig; + } root; + struct { + uint32_t heap_offsets[NUM_POOL_TYPES]; + struct { + uint32_t srv, uav; + } dynamic_buffer_heap_offsets[MAX_DYNAMIC_BUFFERS]; + uint32_t dynamic_buffer_count; + uint32_t range_desc_count[NUM_POOL_TYPES]; + } sets[MAX_SETS]; + uint32_t desc_count[NUM_POOL_TYPES]; + ID3D12PipelineState *state; +}; + +enum dzn_indirect_draw_cmd_sig_type { + DZN_INDIRECT_DRAW_CMD_SIG, + DZN_INDIRECT_INDEXED_DRAW_CMD_SIG, + DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG, + DZN_NUM_INDIRECT_DRAW_CMD_SIGS, +}; + +struct dzn_graphics_pipeline { + dzn_pipeline base; + struct { + unsigned count; + uint32_t strides[MAX_VBS]; + } vb; + + struct { + bool triangle_fan; + D3D_PRIMITIVE_TOPOLOGY topology; + } ia; + + struct { + unsigned count; + bool dynamic; + D3D12_VIEWPORT desc[MAX_VP]; + } vp; + + struct { + unsigned count; + bool dynamic; + D3D12_RECT desc[MAX_SCISSOR]; + } scissor; + + struct { + struct { + bool enable; + bool independent_front_back; + bool dynamic_ref; + bool dynamic_write_mask; + bool dynamic_compare_mask; + struct { + uint32_t ref; + uint32_t write_mask; + uint32_t compare_mask; + bool uses_ref; + } front, back; + } stencil_test; + } zsa; + + ID3D12CommandSignature *indirect_cmd_sigs[DZN_NUM_INDIRECT_DRAW_CMD_SIGS]; +}; + +ID3D12CommandSignature * +dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline *pipeline, + enum dzn_indirect_draw_cmd_sig_type cmd_sig_type); + +struct dzn_compute_pipeline { + dzn_pipeline base; + struct { + uint32_t x, y, z; + } local_size; + + ID3D12CommandSignature *indirect_cmd_sig; +}; + +ID3D12CommandSignature * +dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline *pipeline); + +#define MAX_MIP_LEVELS 14 + +struct dzn_image { + struct vk_image vk; + + struct { + uint32_t row_stride = 0; + uint32_t size = 0; + } linear; + D3D12_RESOURCE_DESC desc; + ID3D12Resource *res; + dzn_device_memory *mem; + VkDeviceSize mem_offset; +}; + +DXGI_FORMAT +dzn_image_get_dxgi_format(VkFormat format, + VkImageUsageFlags usage, + VkImageAspectFlags aspects); + +VkFormat +dzn_image_get_plane_format(VkFormat fmt, VkImageAspectFlags aspect); + +DXGI_FORMAT +dzn_image_get_placed_footprint_format(VkFormat fmt, VkImageAspectFlags aspect); + +D3D12_DEPTH_STENCIL_VIEW_DESC +dzn_image_get_dsv_desc(const dzn_image *image, + const VkImageSubresourceRange *range, + uint32_t level); + +D3D12_RENDER_TARGET_VIEW_DESC +dzn_image_get_rtv_desc(const dzn_image *image, + const VkImageSubresourceRange *range, + uint32_t level); + +D3D12_RESOURCE_STATES +dzn_image_layout_to_state(VkImageLayout layout); + +uint32_t +dzn_image_layers_get_subresource_index(const dzn_image *image, + const VkImageSubresourceLayers *subres, + VkImageAspectFlagBits aspect, + uint32_t layer); +uint32_t +dzn_image_range_get_subresource_index(const dzn_image *image, + const VkImageSubresourceRange *range, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t layer); + +uint32_t +dzn_image_get_subresource_index(const dzn_image *image, + const VkImageSubresource *subres, + VkImageAspectFlagBits aspect); + +D3D12_TEXTURE_COPY_LOCATION +dzn_image_get_copy_loc(const dzn_image *image, + const VkImageSubresourceLayers *layers, + VkImageAspectFlagBits aspect, + uint32_t layer); + +struct dzn_image_view { + struct vk_image_view vk; + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc; + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc; +}; + +void +dzn_image_view_init(dzn_device *device, + dzn_image_view *iview, + const VkImageViewCreateInfo *info); + +void +dzn_image_view_finish(dzn_image_view *iview); + +struct dzn_buffer { + struct vk_object_base base; + + VkDeviceSize size; + + D3D12_RESOURCE_DESC desc; + ID3D12Resource *res; + + VkBufferCreateFlags create_flags; + VkBufferUsageFlags usage; +}; + +DXGI_FORMAT +dzn_buffer_get_dxgi_format(VkFormat format); + +D3D12_TEXTURE_COPY_LOCATION +dzn_buffer_get_copy_loc(const dzn_buffer *buf, VkFormat format, + const VkBufferImageCopy2KHR *info, + VkImageAspectFlagBits aspect, + uint32_t layer); + +D3D12_TEXTURE_COPY_LOCATION +dzn_buffer_get_line_copy_loc(const dzn_buffer *buf, VkFormat format, + const VkBufferImageCopy2KHR *region, + const D3D12_TEXTURE_COPY_LOCATION *loc, + uint32_t y, uint32_t z, uint32_t *start_x); + +bool +dzn_buffer_supports_region_copy(const D3D12_TEXTURE_COPY_LOCATION *loc); + +struct dzn_buffer_view { + struct vk_object_base base; + + const dzn_buffer *buffer; + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; +}; + +struct dzn_framebuffer { + struct vk_object_base base; + + uint32_t width, height, layers; + + uint32_t attachment_count; + struct dzn_image_view **attachments; +}; + +struct dzn_sampler { + struct vk_object_base base; + D3D12_SAMPLER_DESC desc; + D3D12_STATIC_BORDER_COLOR static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; +}; + +/* This is defined as a macro so that it works for both + * VkImageSubresourceRange and VkImageSubresourceLayers + */ +#define dzn_get_layer_count(_image, _range) \ + ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \ + (_image)->vk.array_layers - (_range)->baseArrayLayer : (_range)->layerCount) + +#define dzn_get_level_count(_image, _range) \ + ((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \ + (_image)->vk.mip_levels - (_range)->baseMipLevel : (_range)->levelCount) + +#ifdef __cplusplus +extern "C" { +#endif +DXGI_FORMAT dzn_pipe_to_dxgi_format(enum pipe_format in); +D3D12_FILTER dzn_translate_sampler_filter(const VkSamplerCreateInfo *create_info); +D3D12_COMPARISON_FUNC dzn_translate_compare_op(VkCompareOp in); +void dzn_translate_viewport(D3D12_VIEWPORT *out, const VkViewport *in); +void dzn_translate_rect(D3D12_RECT *out, const VkRect2D *in); +#ifdef __cplusplus +} +#endif + +#define dzn_foreach_aspect(aspect, mask) \ + for (VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT; \ + aspect <= VK_IMAGE_ASPECT_STENCIL_BIT; \ + aspect = (VkImageAspectFlagBits)(aspect << 1)) \ + if (mask & aspect) + +VkResult dzn_wsi_init(struct dzn_physical_device *physical_device); +void dzn_wsi_finish(struct dzn_physical_device *physical_device); + +struct dzn_app_info { + const char *app_name; + uint32_t app_version; + const char *engine_name; + uint32_t engine_version; + uint32_t api_version; +}; + +enum dzn_debug_flags { + DZN_DEBUG_SYNC = 1 << 0, + DZN_DEBUG_NIR = 1 << 1, + DZN_DEBUG_DXIL = 1 << 2, + DZN_DEBUG_WARP = 1 << 3, + DZN_DEBUG_INTERNAL = 1 << 4, + DZN_DEBUG_SIG = 1 << 5, + DZN_DEBUG_GBV = 1 << 6, +}; + +struct dzn_instance { + struct vk_instance vk; + + struct { + IDxcValidator *validator; + IDxcLibrary *library; + IDxcCompiler *compiler; + } dxc; + struct { + PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE serialize_root_sig; + } d3d12; + bool physical_devices_enumerated; + uint32_t debug_flags; + + struct list_head physical_devices; +}; + +struct dzn_event { + struct vk_object_base base; + ID3D12Fence *fence; +}; + +struct dzn_sync { + struct vk_sync vk; + ID3D12Fence *fence; +}; + +extern const struct vk_sync_type dzn_sync_type; + +struct dzn_query { + D3D12_QUERY_TYPE type; + ID3D12Fence *fence; + uint64_t fence_value; +}; + +struct dzn_query_pool { + struct vk_object_base base; + + D3D12_QUERY_HEAP_TYPE heap_type; + ID3D12QueryHeap *heap; + uint32_t query_count; + struct dzn_query *queries; + mtx_t queries_lock; + ID3D12Resource *resolve_buffer; + ID3D12Resource *collect_buffer; + VkQueryPipelineStatisticFlags pipeline_statistics; + uint32_t query_size; + uint64_t *collect_map; +}; + +D3D12_QUERY_TYPE +dzn_query_pool_get_query_type(const dzn_query_pool *qpool, VkQueryControlFlags flag); + +uint32_t +dzn_query_pool_get_result_offset(const dzn_query_pool *qpool, uint32_t query); + +uint32_t +dzn_query_pool_get_availability_offset(const dzn_query_pool *qpool, uint32_t query); + +uint32_t +dzn_query_pool_get_result_size(const dzn_query_pool *qpool, uint32_t count); + +VK_DEFINE_HANDLE_CASTS(dzn_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) +VK_DEFINE_HANDLE_CASTS(dzn_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) +VK_DEFINE_HANDLE_CASTS(dzn_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) +VK_DEFINE_HANDLE_CASTS(dzn_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE) +VK_DEFINE_HANDLE_CASTS(dzn_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) + +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_buffer_view, base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_descriptor_set, base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_descriptor_set_layout, base, VkDescriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_framebuffer, base, VkFramebuffer, VK_OBJECT_TYPE_FRAMEBUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_graphics_pipeline, base.base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_compute_pipeline, base.base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_pipeline_cache, base, VkPipelineCache, VK_OBJECT_TYPE_PIPELINE_CACHE) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_query_pool, base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_render_pass, base, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS) +VK_DEFINE_NONDISP_HANDLE_CASTS(dzn_sampler, base, VkSampler, VK_OBJECT_TYPE_SAMPLER) + +#endif /* DZN_PRIVATE_H */ diff --git a/src/microsoft/vulkan/dzn_query.cpp b/src/microsoft/vulkan/dzn_query.cpp new file mode 100644 index 00000000000..c16ae0abe39 --- /dev/null +++ b/src/microsoft/vulkan/dzn_query.cpp @@ -0,0 +1,327 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_util.h" + +static D3D12_QUERY_HEAP_TYPE +dzn_query_pool_get_heap_type(VkQueryType in) +{ + switch (in) { + case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS; + case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + default: unreachable("Unsupported query type"); + } +} + +D3D12_QUERY_TYPE +dzn_query_pool_get_query_type(const dzn_query_pool *qpool, + VkQueryControlFlags flags) +{ + switch (qpool->heap_type) { + case D3D12_QUERY_HEAP_TYPE_OCCLUSION: + return flags & VK_QUERY_CONTROL_PRECISE_BIT ? + D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION; + case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS; + case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP; + default: unreachable("Unsupported query type"); + } +} + +static void +dzn_query_pool_destroy(dzn_query_pool *qpool, + const VkAllocationCallbacks *alloc) +{ + if (!qpool) + return; + + dzn_device *device = container_of(qpool->base.device, dzn_device, vk); + + if (qpool->collect_map) + qpool->collect_buffer->Unmap(0, NULL); + + if (qpool->collect_buffer) + qpool->collect_buffer->Release(); + + if (qpool->resolve_buffer) + qpool->resolve_buffer->Release(); + + if (qpool->heap) + qpool->heap->Release(); + + for (uint32_t q = 0; q < qpool->query_count; q++) { + if (qpool->queries[q].fence) + qpool->queries[q].fence->Release(); + } + + mtx_destroy(&qpool->queries_lock); + vk_object_base_finish(&qpool->base); + vk_free2(&device->vk.alloc, alloc, qpool); +} + +static VkResult +dzn_query_pool_create(dzn_device *device, + const VkQueryPoolCreateInfo *info, + const VkAllocationCallbacks *alloc, + VkQueryPool *out) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, dzn_query_pool, qpool, 1); + VK_MULTIALLOC_DECL(&ma, dzn_query, queries, info->queryCount); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL); + + mtx_init(&qpool->queries_lock, mtx_plain); + qpool->query_count = info->queryCount; + qpool->queries = queries; + + D3D12_QUERY_HEAP_DESC desc = { 0 }; + qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType); + desc.Count = info->queryCount; + desc.NodeMask = 0; + + HRESULT hres = + device->dev->CreateQueryHeap(&desc, IID_PPV_ARGS(&qpool->heap)); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + switch (info->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + qpool->query_size = sizeof(uint64_t); + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + qpool->pipeline_statistics = info->pipelineStatistics; + qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); + break; + default: unreachable("Unsupported query type"); + } + + D3D12_HEAP_PROPERTIES hprops = + device->dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_DEFAULT); + D3D12_RESOURCE_DESC rdesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = info->queryCount * qpool->query_size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { .Count = 1, .Quality = 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE, + }; + + hres = device->dev->CreateCommittedResource(&hprops, + D3D12_HEAP_FLAG_NONE, + &rdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, IID_PPV_ARGS(&qpool->resolve_buffer)); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + hprops = device->dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_READBACK); + rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t)); + hres = device->dev->CreateCommittedResource(&hprops, + D3D12_HEAP_FLAG_NONE, + &rdesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, IID_PPV_ARGS(&qpool->collect_buffer)); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + hres = qpool->collect_buffer->Map(0, NULL, (void **)&qpool->collect_map); + if (FAILED(hres)) { + dzn_query_pool_destroy(qpool, alloc); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memset(qpool->collect_map, 0, rdesc.Width); + + *out = dzn_query_pool_to_handle(qpool); + return VK_SUCCESS; +} + +uint32_t +dzn_query_pool_get_result_offset(const dzn_query_pool *qpool, uint32_t query) +{ + return query * qpool->query_size; +} + +uint32_t +dzn_query_pool_get_result_size(const dzn_query_pool *qpool, uint32_t query_count) +{ + return query_count * qpool->query_size; +} + +uint32_t +dzn_query_pool_get_availability_offset(const dzn_query_pool *qpool, uint32_t query) +{ + return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_CreateQueryPool(VkDevice device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) +{ + return dzn_query_pool_create(dzn_device_from_handle(device), + pCreateInfo, pAllocator, pQueryPool); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_DestroyQueryPool(VkDevice device, + VkQueryPool queryPool, + const VkAllocationCallbacks *pAllocator) +{ + dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator); +} + +VKAPI_ATTR void VKAPI_CALL +dzn_ResetQueryPool(VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + mtx_lock(&qpool->queries_lock); + for (uint32_t q = 0; q < queryCount; q++) { + dzn_query *query = &qpool->queries[firstQuery + q]; + + query->fence_value = 0; + if (query->fence) { + query->fence->Release(); + query->fence = NULL; + } + } + mtx_lock(&qpool->queries_lock); + + memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery), + 0, queryCount * qpool->query_size); + memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery), + 0, queryCount * sizeof(uint64_t)); +} + +VKAPI_ATTR VkResult VKAPI_CALL +dzn_GetQueryPoolResults(VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void *pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); + + uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ? + sizeof(uint64_t) : sizeof(uint32_t); + VkResult result = VK_SUCCESS; + + for (uint32_t q = 0; q < queryCount; q++) { + dzn_query *query = &qpool->queries[q + firstQuery]; + + uint8_t *dst_ptr = (uint8_t *)pData + (stride * q); + uint8_t *src_ptr = + (uint8_t *)qpool->collect_map + + dzn_query_pool_get_result_offset(qpool, firstQuery + q); + uint64_t available = 0; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ComPtr query_fence(NULL); + uint64_t query_fence_val = 0; + + while (true) { + mtx_lock(&qpool->queries_lock); + query_fence = ComPtr(query->fence); + query_fence_val = query->fence_value; + mtx_unlock(&qpool->queries_lock); + + if (query_fence.Get()) + break; + + /* Check again in 10ms. + * FIXME: decrease the polling period if it happens to hurt latency. + */ + Sleep(10); + } + + query_fence->SetEventOnCompletion(query_fence_val, NULL); + available = UINT64_MAX; + } else { + mtx_lock(&qpool->queries_lock); + ComPtr query_fence(query->fence); + uint64_t query_fence_val = query->fence_value; + mtx_unlock(&qpool->queries_lock); + + if (query_fence.Get() && + query_fence->GetCompletedValue() >= query_fence_val) + available = UINT64_MAX; + } + + if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { + if (available) + memcpy(dst_ptr, src_ptr, step); + else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) + memset(dst_ptr, 0, step); + + dst_ptr += step; + } else { + for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { + if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) + continue; + + if (available) + memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step); + else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) + memset(dst_ptr, 0, step); + + dst_ptr += step; + } + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + memcpy(dst_ptr, &available, step); + + if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) + result = VK_NOT_READY; + } + + return result; +} diff --git a/src/microsoft/vulkan/dzn_sync.cpp b/src/microsoft/vulkan/dzn_sync.cpp new file mode 100644 index 00000000000..583543f72d7 --- /dev/null +++ b/src/microsoft/vulkan/dzn_sync.cpp @@ -0,0 +1,203 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_alloc.h" +#include "vk_debug_report.h" +#include "vk_util.h" + +#include "util/macros.h" +#include "util/os_time.h" + +static VkResult +dzn_sync_init(struct vk_device *device, + struct vk_sync *sync, + uint64_t initial_value) +{ + dzn_sync *dsync = container_of(sync, dzn_sync, vk); + dzn_device *ddev = container_of(device, dzn_device, vk); + + assert(!(sync->flags & VK_SYNC_IS_SHAREABLE)); + + if (FAILED(ddev->dev->CreateFence(initial_value, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&dsync->fence)))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +static void +dzn_sync_finish(struct vk_device *device, + struct vk_sync *sync) +{ + dzn_sync *dsync = container_of(sync, dzn_sync, vk); + + dsync->fence->Release(); +} + +static VkResult +dzn_sync_signal(struct vk_device *device, + struct vk_sync *sync, + uint64_t value) +{ + dzn_sync *dsync = container_of(sync, dzn_sync, vk); + + if (!(sync->flags & VK_SYNC_IS_TIMELINE)) + value = 1; + + if (FAILED(dsync->fence->Signal(value))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +static VkResult +dzn_sync_get_value(struct vk_device *device, + struct vk_sync *sync, + uint64_t *value) +{ + dzn_sync *dsync = container_of(sync, dzn_sync, vk); + + *value = dsync->fence->GetCompletedValue(); + return VK_SUCCESS; +} + +static VkResult +dzn_sync_reset(struct vk_device *device, + struct vk_sync *sync) +{ + dzn_sync *dsync = container_of(sync, dzn_sync, vk); + + if (FAILED(dsync->fence->Signal(0))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return VK_SUCCESS; +} + +static VkResult +dzn_sync_move(struct vk_device *device, + struct vk_sync *dst, + struct vk_sync *src) +{ + dzn_device *ddev = container_of(device, dzn_device, vk); + dzn_sync *ddst = container_of(dst, dzn_sync, vk); + dzn_sync *dsrc = container_of(src, dzn_sync, vk); + ID3D12Fence *new_fence; + + if (FAILED(ddev->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&new_fence)))) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + ddst->fence->Release(); + ddst->fence = dsrc->fence; + dsrc->fence = new_fence; + return VK_SUCCESS; +} + +static VkResult +dzn_sync_wait(struct vk_device *device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + dzn_device *ddev = container_of(device, dzn_device, vk); + + HANDLE event = CreateEventA(NULL, FALSE, FALSE, NULL); + if (event == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + STACK_ARRAY(ID3D12Fence *, fences, wait_count); + STACK_ARRAY(uint64_t, values, wait_count); + + for (uint32_t i = 0; i < wait_count; i++) { + dzn_sync *sync = container_of(waits[i].sync, dzn_sync, vk); + + fences[i] = sync->fence; + values[i] = (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? waits[i].wait_value : 1; + } + + D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags = + (wait_flags & VK_SYNC_WAIT_ANY) ? + D3D12_MULTIPLE_FENCE_WAIT_FLAG_ANY : + D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL; + + if (FAILED(ddev->dev->SetEventOnMultipleFenceCompletion(fences, values, + wait_count, flags, + event))) { + STACK_ARRAY_FINISH(fences); + STACK_ARRAY_FINISH(values); + CloseHandle(event); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + DWORD timeout_ms; + + if (abs_timeout_ns == OS_TIMEOUT_INFINITE) { + timeout_ms = INFINITE; + } else { + uint64_t cur_time = os_time_get_nano(); + uint64_t rel_timeout_ns = + abs_timeout_ns > cur_time ? abs_timeout_ns - cur_time : 0; + + timeout_ms = (rel_timeout_ns / 1000000) + (rel_timeout_ns % 1000000 ? 1 : 0); + } + + DWORD res = + WaitForSingleObject(event, timeout_ms); + + CloseHandle(event); + + STACK_ARRAY_FINISH(fences); + STACK_ARRAY_FINISH(values); + + if (res == WAIT_TIMEOUT) + return VK_TIMEOUT; + else if (res != WAIT_OBJECT_0) + return vk_error(device, VK_ERROR_UNKNOWN); + + return VK_SUCCESS; +} + +const struct vk_sync_type dzn_sync_type = { + .size = sizeof(dzn_sync), + .features = (enum vk_sync_features) + (VK_SYNC_FEATURE_BINARY | + VK_SYNC_FEATURE_TIMELINE | + VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_GPU_MULTI_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET | + VK_SYNC_FEATURE_CPU_SIGNAL | + VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL), + + .init = dzn_sync_init, + .finish = dzn_sync_finish, + .signal = dzn_sync_signal, + .get_value = dzn_sync_get_value, + .reset = dzn_sync_reset, + .move = dzn_sync_move, + .wait_many = dzn_sync_wait, +}; diff --git a/src/microsoft/vulkan/dzn_util.c b/src/microsoft/vulkan/dzn_util.c new file mode 100644 index 00000000000..dd811396f91 --- /dev/null +++ b/src/microsoft/vulkan/dzn_util.c @@ -0,0 +1,234 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "util/format/u_format.h" + +static const DXGI_FORMAT formats[PIPE_FORMAT_COUNT] = { +#define MAP_FORMAT_NORM(FMT) \ + [PIPE_FORMAT_ ## FMT ## _UNORM] = DXGI_FORMAT_ ## FMT ## _UNORM, \ + [PIPE_FORMAT_ ## FMT ## _SNORM] = DXGI_FORMAT_ ## FMT ## _SNORM, + +#define MAP_FORMAT_INT(FMT) \ + [PIPE_FORMAT_ ## FMT ## _UINT] = DXGI_FORMAT_ ## FMT ## _UINT, \ + [PIPE_FORMAT_ ## FMT ## _SINT] = DXGI_FORMAT_ ## FMT ## _SINT, + +#define MAP_FORMAT_SRGB(FMT) \ + [PIPE_FORMAT_ ## FMT ## _SRGB] = DXGI_FORMAT_ ## FMT ## _UNORM_SRGB, + +#define MAP_FORMAT_FLOAT(FMT) \ + [PIPE_FORMAT_ ## FMT ## _FLOAT] = DXGI_FORMAT_ ## FMT ## _FLOAT, + +#define MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE) \ + [PIPE_FORMAT_L ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \ + [PIPE_FORMAT_I ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \ + [PIPE_FORMAT_L ## BITS ## A ## BITS ## _ ## TYPE] = \ + DXGI_FORMAT_R ## BITS ## G ## BITS ## _ ## TYPE, + +#define MAP_EMU_FORMAT(BITS, TYPE) \ + [PIPE_FORMAT_A ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \ + MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE) + + MAP_FORMAT_NORM(R8) + MAP_FORMAT_INT(R8) + + MAP_FORMAT_NORM(R8G8) + MAP_FORMAT_INT(R8G8) + + MAP_FORMAT_NORM(R8G8B8A8) + MAP_FORMAT_INT(R8G8B8A8) + MAP_FORMAT_SRGB(R8G8B8A8) + + [PIPE_FORMAT_B8G8R8X8_UNORM] = DXGI_FORMAT_B8G8R8X8_UNORM, + [PIPE_FORMAT_B8G8R8A8_UNORM] = DXGI_FORMAT_B8G8R8A8_UNORM, + [PIPE_FORMAT_B4G4R4A4_UNORM] = DXGI_FORMAT_B4G4R4A4_UNORM, + [PIPE_FORMAT_A4R4G4B4_UNORM] = DXGI_FORMAT_B4G4R4A4_UNORM, + [PIPE_FORMAT_B5G6R5_UNORM] = DXGI_FORMAT_B5G6R5_UNORM, + [PIPE_FORMAT_B5G5R5A1_UNORM] = DXGI_FORMAT_B5G5R5A1_UNORM, + + MAP_FORMAT_SRGB(B8G8R8A8) + + MAP_FORMAT_INT(R32) + MAP_FORMAT_FLOAT(R32) + MAP_FORMAT_INT(R32G32) + MAP_FORMAT_FLOAT(R32G32) + MAP_FORMAT_INT(R32G32B32) + MAP_FORMAT_FLOAT(R32G32B32) + MAP_FORMAT_INT(R32G32B32A32) + MAP_FORMAT_FLOAT(R32G32B32A32) + + MAP_FORMAT_NORM(R16) + MAP_FORMAT_INT(R16) + MAP_FORMAT_FLOAT(R16) + + MAP_FORMAT_NORM(R16G16) + MAP_FORMAT_INT(R16G16) + MAP_FORMAT_FLOAT(R16G16) + + MAP_FORMAT_NORM(R16G16B16A16) + MAP_FORMAT_INT(R16G16B16A16) + MAP_FORMAT_FLOAT(R16G16B16A16) + + [PIPE_FORMAT_A8_UNORM] = DXGI_FORMAT_A8_UNORM, + MAP_EMU_FORMAT_NO_ALPHA(8, UNORM) + MAP_EMU_FORMAT(8, SNORM) + MAP_EMU_FORMAT(8, SINT) + MAP_EMU_FORMAT(8, UINT) + MAP_EMU_FORMAT(16, UNORM) + MAP_EMU_FORMAT(16, SNORM) + MAP_EMU_FORMAT(16, SINT) + MAP_EMU_FORMAT(16, UINT) + MAP_EMU_FORMAT(16, FLOAT) + MAP_EMU_FORMAT(32, SINT) + MAP_EMU_FORMAT(32, UINT) + MAP_EMU_FORMAT(32, FLOAT) + + [PIPE_FORMAT_R9G9B9E5_FLOAT] = DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + [PIPE_FORMAT_R11G11B10_FLOAT] = DXGI_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UINT] = DXGI_FORMAT_R10G10B10A2_UINT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = DXGI_FORMAT_R10G10B10A2_UNORM, + + [PIPE_FORMAT_DXT1_RGB] = DXGI_FORMAT_BC1_UNORM, + [PIPE_FORMAT_DXT1_RGBA] = DXGI_FORMAT_BC1_UNORM, + [PIPE_FORMAT_DXT3_RGBA] = DXGI_FORMAT_BC2_UNORM, + [PIPE_FORMAT_DXT5_RGBA] = DXGI_FORMAT_BC3_UNORM, + + [PIPE_FORMAT_DXT1_SRGB] = DXGI_FORMAT_BC1_UNORM_SRGB, + [PIPE_FORMAT_DXT1_SRGBA] = DXGI_FORMAT_BC1_UNORM_SRGB, + [PIPE_FORMAT_DXT3_SRGBA] = DXGI_FORMAT_BC2_UNORM_SRGB, + [PIPE_FORMAT_DXT5_SRGBA] = DXGI_FORMAT_BC3_UNORM_SRGB, + + [PIPE_FORMAT_RGTC1_UNORM] = DXGI_FORMAT_BC4_UNORM, + [PIPE_FORMAT_RGTC1_SNORM] = DXGI_FORMAT_BC4_SNORM, + [PIPE_FORMAT_RGTC2_UNORM] = DXGI_FORMAT_BC5_UNORM, + [PIPE_FORMAT_RGTC2_SNORM] = DXGI_FORMAT_BC5_SNORM, + + [PIPE_FORMAT_BPTC_RGB_UFLOAT] = DXGI_FORMAT_BC6H_UF16, + [PIPE_FORMAT_BPTC_RGB_FLOAT] = DXGI_FORMAT_BC6H_SF16, + [PIPE_FORMAT_BPTC_RGBA_UNORM] = DXGI_FORMAT_BC7_UNORM, + [PIPE_FORMAT_BPTC_SRGBA] = DXGI_FORMAT_BC7_UNORM_SRGB, + + [PIPE_FORMAT_Z32_FLOAT] = DXGI_FORMAT_R32_TYPELESS, + [PIPE_FORMAT_Z16_UNORM] = DXGI_FORMAT_R16_TYPELESS, + [PIPE_FORMAT_Z24X8_UNORM] = DXGI_FORMAT_R24G8_TYPELESS, + [PIPE_FORMAT_X24S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS, + + [PIPE_FORMAT_Z24_UNORM_S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS, + [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS, + [PIPE_FORMAT_X32_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS, +}; + +DXGI_FORMAT +dzn_pipe_to_dxgi_format(enum pipe_format in) +{ + return formats[in]; +} + +struct dzn_sampler_filter_info { + VkFilter min, mag; + VkSamplerMipmapMode mipmap; +}; + +#define FILTER(__min, __mag, __mipmap) \ +{ \ + .min = VK_FILTER_ ## __min, \ + .mag = VK_FILTER_ ## __mag, \ + .mipmap = VK_SAMPLER_MIPMAP_MODE_ ## __mipmap, \ +} + +static const struct dzn_sampler_filter_info filter_table[] = { + [D3D12_FILTER_MIN_MAG_MIP_POINT] = FILTER(NEAREST, NEAREST, NEAREST), + [D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR] = FILTER(NEAREST, NEAREST, LINEAR), + [D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT] = FILTER(NEAREST, LINEAR, NEAREST), + [D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR] = FILTER(NEAREST, LINEAR, LINEAR), + [D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT] = FILTER(LINEAR, NEAREST, NEAREST), + [D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR] = FILTER(LINEAR, NEAREST, LINEAR), + [D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT] = FILTER(LINEAR, LINEAR, NEAREST), + [D3D12_FILTER_MIN_MAG_MIP_LINEAR] = FILTER(LINEAR, LINEAR, LINEAR), +}; + +dzn_translate_sampler_filter(const VkSamplerCreateInfo *create_info) +{ + D3D12_FILTER filter; + + if (!create_info->anisotropyEnable) { + unsigned i; + for (i = 0; i < ARRAY_SIZE(filter_table); i++) { + if (create_info->minFilter == filter_table[i].min && + create_info->magFilter == filter_table[i].mag && + create_info->mipmapMode == filter_table[i].mipmap) { + filter = (D3D12_FILTER)i; + break; + } + } + + assert(i < ARRAY_SIZE(filter_table)); + } else { + filter = D3D12_FILTER_ANISOTROPIC; + } + + if (create_info->compareEnable) + filter = (D3D12_FILTER)(filter + D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT); + + return filter; +} + +D3D12_COMPARISON_FUNC +dzn_translate_compare_op(VkCompareOp in) +{ + switch (in) { + case VK_COMPARE_OP_NEVER: return D3D12_COMPARISON_FUNC_NEVER; + case VK_COMPARE_OP_LESS: return D3D12_COMPARISON_FUNC_LESS; + case VK_COMPARE_OP_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; + case VK_COMPARE_OP_LESS_OR_EQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case VK_COMPARE_OP_GREATER: return D3D12_COMPARISON_FUNC_GREATER; + case VK_COMPARE_OP_NOT_EQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case VK_COMPARE_OP_GREATER_OR_EQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case VK_COMPARE_OP_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; + default: unreachable("Invalid compare op"); + } +} + +void +dzn_translate_viewport(D3D12_VIEWPORT *out, + const VkViewport *in) +{ + out->TopLeftX = in->x; + out->TopLeftY = in->y; + out->Width = in->width; + out->Height = abs(in->height); + out->MinDepth = MIN2(in->minDepth, in->maxDepth); + out->MaxDepth = MAX2(in->maxDepth, in->minDepth); +} + +void +dzn_translate_rect(D3D12_RECT *out, + const VkRect2D *in) +{ + out->left = in->offset.x; + out->top = in->offset.y; + out->right = in->offset.x + in->extent.width; + out->bottom = in->offset.y + in->extent.height; +} diff --git a/src/microsoft/vulkan/dzn_util.cpp b/src/microsoft/vulkan/dzn_util.cpp new file mode 100644 index 00000000000..9c1740d8b97 --- /dev/null +++ b/src/microsoft/vulkan/dzn_util.cpp @@ -0,0 +1,226 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" + +#include "vk_enum_to_str.h" + +#include +#include + +#include + +IDXGIFactory4 * +dxgi_get_factory(bool debug) +{ + static const GUID IID_IDXGIFactory4 = { + 0x1bc6ea02, 0xef36, 0x464f, + { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a } + }; + + HMODULE dxgi_mod = LoadLibraryA("DXGI.DLL"); + if (!dxgi_mod) { + mesa_loge("failed to load DXGI.DLL\n"); + return NULL; + } + + typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY2)(UINT flags, REFIID riid, void **ppFactory); + PFN_CREATE_DXGI_FACTORY2 CreateDXGIFactory2; + + CreateDXGIFactory2 = (PFN_CREATE_DXGI_FACTORY2)GetProcAddress(dxgi_mod, "CreateDXGIFactory2"); + if (!CreateDXGIFactory2) { + mesa_loge("failed to load CreateDXGIFactory2 from DXGI.DLL\n"); + return NULL; + } + + UINT flags = 0; + if (debug) + flags |= DXGI_CREATE_FACTORY_DEBUG; + + IDXGIFactory4 *factory; + HRESULT hr = CreateDXGIFactory2(flags, IID_IDXGIFactory4, (void **)&factory); + if (FAILED(hr)) { + mesa_loge("CreateDXGIFactory2 failed: %08x\n", hr); + return NULL; + } + + return factory; +} + +static ComPtr +get_debug_interface() +{ + typedef HRESULT(WINAPI *PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid, void **ppFactory); + PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface; + + HMODULE d3d12_mod = LoadLibraryA("D3D12.DLL"); + if (!d3d12_mod) { + mesa_loge("failed to load D3D12.DLL\n"); + return NULL; + } + + D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(d3d12_mod, "D3D12GetDebugInterface"); + if (!D3D12GetDebugInterface) { + mesa_loge("failed to load D3D12GetDebugInterface from D3D12.DLL\n"); + return NULL; + } + + ComPtr debug; + if (FAILED(D3D12GetDebugInterface(IID_PPV_ARGS(&debug)))) { + mesa_loge("D3D12GetDebugInterface failed\n"); + return NULL; + } + + return debug; +} + +void +d3d12_enable_debug_layer() +{ + ComPtr debug = get_debug_interface(); + if (debug) + debug->EnableDebugLayer(); +} + +void +d3d12_enable_gpu_validation() +{ + ComPtr debug = get_debug_interface(); + ComPtr debug3; + if (debug && + SUCCEEDED(debug->QueryInterface(IID_PPV_ARGS(&debug3)))) + debug3->SetEnableGPUBasedValidation(true); +} + +ID3D12Device1 * +d3d12_create_device(IUnknown *adapter, bool experimental_features) +{ + typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**); + PFN_D3D12CREATEDEVICE D3D12CreateDevice; + + HMODULE d3d12_mod = LoadLibraryA("D3D12.DLL"); + if (!d3d12_mod) { + mesa_loge("failed to load D3D12.DLL\n"); + return NULL; + } + +#ifdef _WIN32 + if (experimental_features) +#endif + { + typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID*, void*, UINT*); + PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures = + (PFN_D3D12ENABLEEXPERIMENTALFEATURES)GetProcAddress(d3d12_mod, "D3D12EnableExperimentalFeatures"); + if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL))) { + mesa_loge("failed to enable experimental shader models\n"); + return nullptr; + } + } + + D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(d3d12_mod, "D3D12CreateDevice"); + if (!D3D12CreateDevice) { + mesa_loge("failed to load D3D12CreateDevice from D3D12.DLL\n"); + return NULL; + } + + ID3D12Device1 *dev; + if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&dev)))) + return dev; + + mesa_loge("D3D12CreateDevice failed\n"); + return NULL; +} + +IDxcValidator * +dxil_get_validator(void) +{ + IDxcValidator *ret = NULL; + + HMODULE dxil_mod = LoadLibraryA("dxil.dll"); + if (!dxil_mod) { + mesa_loge("failed to load dxil.dll\n"); + return ret; + } + + DxcCreateInstanceProc CreateInstance = (DxcCreateInstanceProc) + GetProcAddress(dxil_mod, "DxcCreateInstance"); + HRESULT hr = CreateInstance(CLSID_DxcValidator, IID_PPV_ARGS(&ret)); + if (FAILED(hr)) + mesa_loge("DxcCreateInstance failed: %08x\n", hr); + + return ret; +} + +PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE +d3d12_get_serialize_root_sig(void) +{ + HMODULE d3d12_mod = LoadLibraryA("d3d12.dll"); + if (!d3d12_mod) { + mesa_loge("failed to load d3d12.dll\n"); + return NULL; + } + + return (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE) + GetProcAddress(d3d12_mod, "D3D12SerializeVersionedRootSignature"); +} + +IDxcLibrary * +dxc_get_library(void) +{ + IDxcLibrary *ret = NULL; + + HMODULE dxil_mod = LoadLibraryA("dxcompiler.dll"); + if (!dxil_mod) { + mesa_loge("failed to load dxcompiler.dll\n"); + return ret; + } + + DxcCreateInstanceProc CreateInstance = (DxcCreateInstanceProc) + GetProcAddress(dxil_mod, "DxcCreateInstance"); + HRESULT hr = CreateInstance(CLSID_DxcLibrary, IID_PPV_ARGS(&ret)); + if (FAILED(hr)) + mesa_loge("DxcCreateInstance failed: %08x\n", hr); + + return ret; +} + +IDxcCompiler * +dxc_get_compiler(void) +{ + IDxcCompiler *ret = NULL; + + HMODULE dxil_mod = LoadLibraryA("dxcompiler.dll"); + if (!dxil_mod) { + mesa_loge("failed to load dxcompiler.dll\n"); + return ret; + } + + DxcCreateInstanceProc CreateInstance = (DxcCreateInstanceProc) + GetProcAddress(dxil_mod, "DxcCreateInstance"); + HRESULT hr = CreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&ret)); + if (FAILED(hr)) + mesa_loge("DxcCreateInstance failed: %08x\n", hr); + + return ret; +} diff --git a/src/microsoft/vulkan/dzn_wsi.cpp b/src/microsoft/vulkan/dzn_wsi.cpp new file mode 100644 index 00000000000..1cb95ef0a00 --- /dev/null +++ b/src/microsoft/vulkan/dzn_wsi.cpp @@ -0,0 +1,64 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dzn_private.h" +#include "vk_util.h" + +static PFN_vkVoidFunction VKAPI_PTR +dzn_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) +{ + VK_FROM_HANDLE(dzn_physical_device, pdevice, physicalDevice); + return vk_instance_get_proc_addr_unchecked(pdevice->vk.instance, pName); +} + +void +dzn_wsi_finish(struct dzn_physical_device *physical_device) +{ + wsi_device_finish(&physical_device->wsi_device, + &physical_device->vk.instance->alloc); +} + +VkResult +dzn_wsi_init(struct dzn_physical_device *physical_device) +{ + VkResult result; + + /* TODO: implement a proper, non-sw winsys for D3D12 */ + bool sw_device = true; + + result = wsi_device_init(&physical_device->wsi_device, + dzn_physical_device_to_handle(physical_device), + dzn_wsi_proc_addr, + &physical_device->vk.instance->alloc, + -1, NULL, sw_device); + + if (result != VK_SUCCESS) + return result; + + physical_device->wsi_device.supports_modifiers = false; + physical_device->vk.wsi_device = &physical_device->wsi_device; + physical_device->wsi_device.signal_semaphore_with_memory = true; + physical_device->wsi_device.signal_fence_with_memory = true; + + return VK_SUCCESS; +} diff --git a/src/microsoft/vulkan/meson.build b/src/microsoft/vulkan/meson.build new file mode 100644 index 00000000000..fb859d97a9d --- /dev/null +++ b/src/microsoft/vulkan/meson.build @@ -0,0 +1,123 @@ +# Copyright © Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +dzn_entrypoints = custom_target( + 'dzn_entrypoints', + input : [vk_entrypoints_gen, vk_api_xml], + output : ['dzn_entrypoints.h', 'dzn_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', + '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'dzn' + ], + depend_files : vk_entrypoints_gen_depend_files, +) + +libdzn_files = files( + 'dzn_cmd_buffer.cpp', + 'dzn_descriptor_set.cpp', + 'dzn_device.cpp', + 'dzn_image.cpp', + 'dzn_meta.cpp', + 'dzn_nir.c', + 'dzn_pass.cpp', + 'dzn_pipeline_cache.cpp', + 'dzn_pipeline.cpp', + 'dzn_query.cpp', + 'dzn_sync.cpp', + 'dzn_util.cpp', + 'dzn_util.c', + 'dzn_wsi.cpp', +) + +dzn_deps = [ + idep_libdxil_compiler, + idep_libspirv_to_dxil, + idep_nir, + idep_nir_headers, + idep_vulkan_util, + idep_vulkan_runtime, + idep_vulkan_wsi, + dep_dxheaders, +] + +dzn_flags = [ ] + +if with_platform_windows + dzn_flags += '-DVK_USE_PLATFORM_WIN32_KHR' +endif + +libvulkan_dzn = shared_library( + 'vulkan_dzn', + [libdzn_files, dzn_entrypoints, sha1_h], + vs_module_defs : 'vulkan_dzn.def', + include_directories : [ + inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, + inc_compiler, inc_util + ], + dependencies : [dzn_deps, idep_vulkan_wsi], + c_args : dzn_flags, + cpp_args : dzn_flags, + gnu_symbol_visibility : 'hidden', + link_args : [ld_args_bsymbolic, ld_args_gc_sections], + name_prefix : host_machine.system() == 'windows' ? '' : 'lib', + install : true, + override_options: ['cpp_std=c++latest'] +) + +icd_file_name = 'libvulkan_dzn.so' +module_dir = join_paths(get_option('prefix'), get_option('libdir')) +if with_platform_windows + module_dir = join_paths(get_option('prefix'), get_option('bindir')) + icd_file_name = 'vulkan_dzn.dll' +endif + +dzn_icd = custom_target( + 'dzn_icd', + input : [vk_icd_gen, vk_api_xml], + output : 'dzn_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.2', '--xml', '@INPUT1@', + '--lib-path', join_paths(module_dir, icd_file_name), + '--out', '@OUTPUT@', + ], + build_by_default : true, + install_dir : with_vulkan_icd_dir, + install : true, +) + +if meson.version().version_compare('>= 0.58') + _dev_icdname = 'dzn_devenv_icd.@0@.json'.format(host_machine.cpu()) + custom_target( + 'dzn_devenv_icd', + input : [vk_icd_gen, vk_api_xml], + output : _dev_icdname, + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.1', '--xml', '@INPUT1@', + '--lib-path', meson.current_build_dir() / icd_file_name, + '--out', '@OUTPUT@', + ], + build_by_default : true, + ) + + devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname) +endif diff --git a/src/microsoft/vulkan/vulkan_dzn.def b/src/microsoft/vulkan/vulkan_dzn.def new file mode 100644 index 00000000000..64a9caae593 --- /dev/null +++ b/src/microsoft/vulkan/vulkan_dzn.def @@ -0,0 +1,4 @@ +EXPORTS +vk_icdNegotiateLoaderICDInterfaceVersion +vk_icdGetInstanceProcAddr +vk_icdGetPhysicalDeviceProcAddr