Compare commits

...

7 Commits

Author SHA1 Message Date
Hans-Kristian Arntzen 985c906d3a Hack up test for repro. 2021-09-24 12:47:00 +02:00
Hans-Kristian Arntzen 768ccee59e renderdoc: Add global capture support.
Useful for test suite since a test can be comprised of several smaller
submissions, and it's easier to debug if we have one trace.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 12:47:00 +02:00
Hans-Kristian Arntzen 19e6d1502c tests: Remove obsolete check for lack of wait-before-signal support.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-23 22:07:43 +02:00
Hans-Kristian Arntzen 87759d882c vkd3d: Ignore DiscardResource for committed resources.
Not required for correctness.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-23 18:01:08 +02:00
Hans-Kristian Arntzen bb89b4d0fa vkd3d: Don't transition from UNDEFINED in ClearAttachment unless forced.
The transition from UNDEFINED requires a full barrier and is significantly
slower than just doing the CLEAR.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-23 17:58:45 +02:00
Hans-Kristian Arntzen 955b2dbf3b tests: Reduce test image to 512x512.
Otherwise the test seems to hit some weird trashing effect on RDNA2 with
128MB infinity cache. Batch size of 64 is significantly slower than 32
for some reason.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-23 17:56:03 +02:00
Hans-Kristian Arntzen cc519d4492 tests: Add microbenchmark for ClearRTV perf.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-23 16:47:19 +02:00
8 changed files with 390 additions and 43 deletions

View File

@ -203,6 +203,9 @@ pass `-Denable_renderdoc=true` to Meson.
vkd3d-proton will automatically make a capture when a specific shader is encountered.
- `VKD3D_AUTO_CAPTURE_COUNTS` - A comma-separated list of indices. This can be used to control which queue submissions to capture.
E.g., use `VKD3D_AUTO_CAPTURE_COUNTS=0,4,10` to capture the 0th (first submission), 4th and 10th submissions which are candidates for capturing.
If `VKD3D_AUTO_CAPTURE_COUNTS` is `-1`, the entire app runtime can be turned into one big capture.
This is only intended to be used when capturing something like the test suite,
or tiny applications with a finite runtime to make it easier to debug cross submission work.
If only `VKD3D_AUTO_CAPTURE_COUNTS` is set, any queue submission is considered for capturing.
If only `VKD3D_AUTO_CAPTURE_SHADER` is set, `VKD3D_AUTO_CAPTURE_COUNTS` is considered to be equal to `"0"`, i.e. a capture is only

View File

@ -2438,6 +2438,8 @@ static VkImageLayout vk_separate_stencil_layout(VkImageLayout combined_layout)
VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL;
}
static bool d3d12_resource_may_alias_other_resources(struct d3d12_resource *resource);
static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *list, struct d3d12_resource *resource,
struct vkd3d_view *view, VkImageAspectFlags clear_aspects, const VkClearValue *clear_value, UINT rect_count,
const D3D12_RECT *rects, bool is_bound)
@ -2453,6 +2455,8 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
VkSubpassEndInfoKHR subpass_end_info;
VkRenderPassCreateInfo2KHR pass_info;
VkRenderPassBeginInfo begin_info;
bool require_full_src_barrier;
bool require_full_dst_barrier;
VkFramebuffer vk_framebuffer;
VkRenderPass vk_render_pass;
VkPipelineStageFlags stages;
@ -2461,8 +2465,16 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
VkAccessFlags access;
VkExtent3D extent;
bool clear_op;
bool discard;
VkResult vr;
/* There is no reason to introduce a full barrier for a clear unless we are forced by
* the API to inject a true discard.
* For aliased resources in D3D12, a full clear can complete the alias ownership transfer,
* and we must be conservative and use UNDEFINED oldLayout. For committed resources (common case),
* this can never happen and we rely on the initial transition mechanism instead. */
discard = d3d12_resource_may_alias_other_resources(resource);
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
attachment_desc.pNext = NULL;
attachment_desc.flags = 0;
@ -2562,7 +2574,7 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
/* Ignore 3D images as re-initializing those may cause us to
* discard the entire image, not just the layers to clear. */
if (resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
if (discard && resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
{
if (separate_ds_layouts)
{
@ -2598,28 +2610,60 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
subpass_desc.pColorAttachments = &attachment_ref;
}
require_full_src_barrier = (attachment_desc.initialLayout != attachment_ref.layout) ||
(separate_ds_layouts && (stencil_attachment_desc.stencilInitialLayout != stencil_attachment_ref.stencilLayout));
require_full_dst_barrier = (attachment_desc.finalLayout != attachment_ref.layout) ||
(separate_ds_layouts && (stencil_attachment_desc.stencilFinalLayout != stencil_attachment_ref.stencilLayout));
dependencies[0].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
dependencies[0].pNext = NULL;
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = stages;
dependencies[0].dstStageMask = stages;
dependencies[0].srcAccessMask = clear_op ? access : 0;
dependencies[0].dstAccessMask = access;
dependencies[0].dependencyFlags = 0;
dependencies[0].viewOffset = 0;
if (require_full_src_barrier)
{
dependencies[0].srcAccessMask = clear_op ? access : 0;
dependencies[0].dstAccessMask = access;
dependencies[0].dependencyFlags = 0;
}
else
{
/* Same setup as normal render passes.
* The 0 access mask is technically a hack.
* See vkd3d_render_pass_cache_create_pass_locked(). */
dependencies[0].srcAccessMask = 0;
dependencies[0].dstAccessMask = 0;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
}
dependencies[1].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
dependencies[1].pNext = NULL;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = stages;
dependencies[1].dstStageMask = stages;
dependencies[1].srcAccessMask = access;
dependencies[1].dstAccessMask = 0;
dependencies[1].dependencyFlags = 0;
dependencies[1].viewOffset = 0;
if (require_full_dst_barrier)
{
dependencies[1].srcAccessMask = access;
dependencies[1].dstAccessMask = 0;
dependencies[1].dependencyFlags = 0;
}
else
{
/* Same setup as normal render passes.
* The 0 access mask is technically a hack.
* See vkd3d_render_pass_cache_create_pass_locked(). */
dependencies[1].srcAccessMask = 0;
dependencies[1].dstAccessMask = 0;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
}
pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
pass_info.pNext = NULL;
pass_info.flags = 0;
@ -8239,6 +8283,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(d3d12_command_l
return;
}
/* Only care about resources which could alias memory with something else.
* We only need to transition committed resources once,
* and render targets tend to be that.
* There are no particular performance benefits in using DiscardResource as far as I know ... */
if (!d3d12_resource_may_alias_other_resources(texture))
return;
/* Assume that pRegion == NULL means that we should discard
* the entire resource. This does not seem to be documented. */
resource_subresource_count = d3d12_resource_get_sub_resource_count(texture);

View File

@ -2538,6 +2538,12 @@ static void d3d12_device_destroy(struct d3d12_device *device)
vkd3d_memory_allocator_cleanup(&device->memory_allocator, device);
/* Tear down descriptor global info late, so we catch last minute faults after we drain the queues. */
vkd3d_descriptor_debug_free_global_info(device->descriptor_qa_global_info, device);
#ifdef VKD3D_ENABLE_RENDERDOC
if (vkd3d_renderdoc_active() && vkd3d_renderdoc_global_capture_enabled())
vkd3d_renderdoc_end_capture(device->vkd3d_instance->vk_instance);
#endif
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
pthread_mutex_destroy(&device->mutex);
if (device->parent)
@ -5242,6 +5248,12 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
IUnknown_AddRef(device->parent);
d3d12_device_caps_init(device);
#ifdef VKD3D_ENABLE_RENDERDOC
if (vkd3d_renderdoc_active() && vkd3d_renderdoc_global_capture_enabled())
vkd3d_renderdoc_begin_capture(device->vkd3d_instance->vk_instance);
#endif
return S_OK;
out_cleanup_global_pipeline_cache:

View File

@ -42,6 +42,7 @@ static vkd3d_shader_hash_t renderdoc_capture_shader_hash;
static uint32_t *renderdoc_capture_counts;
static size_t renderdoc_capture_counts_count;
static bool vkd3d_renderdoc_is_active;
static bool vkd3d_renderdoc_global_capture;
static void vkd3d_renderdoc_init_capture_count_list(const char *env)
{
@ -49,6 +50,13 @@ static void vkd3d_renderdoc_init_capture_count_list(const char *env)
uint32_t count;
char *endp;
if (strcmp(env, "-1") == 0)
{
INFO("Doing one big capture of the entire lifetime of a device.\n");
vkd3d_renderdoc_global_capture = true;
return;
}
while (*env != '\0')
{
errno = 0;
@ -180,6 +188,11 @@ bool vkd3d_renderdoc_active(void)
return vkd3d_renderdoc_is_active;
}
bool vkd3d_renderdoc_global_capture_enabled(void)
{
return vkd3d_renderdoc_global_capture;
}
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash)
{
return (renderdoc_capture_shader_hash == hash) || (renderdoc_capture_shader_hash == 0);
@ -190,9 +203,12 @@ bool vkd3d_renderdoc_begin_capture(void *instance)
static uint32_t overall_counter;
uint32_t counter;
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
if (!vkd3d_renderdoc_enable_submit_counter(counter))
return false;
if (!vkd3d_renderdoc_global_capture)
{
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
if (!vkd3d_renderdoc_enable_submit_counter(counter))
return false;
}
if (renderdoc_api)
renderdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL);
@ -215,6 +231,9 @@ void vkd3d_renderdoc_command_list_check_capture(struct d3d12_command_list *list,
{
unsigned int i;
if (vkd3d_renderdoc_global_capture_enabled())
return;
if (vkd3d_renderdoc_active() && state)
{
if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
@ -246,6 +265,9 @@ bool vkd3d_renderdoc_command_queue_begin_capture(struct d3d12_command_queue *com
VkDebugUtilsLabelEXT capture_label;
bool debug_capture;
if (vkd3d_renderdoc_global_capture_enabled())
return false;
debug_capture = vkd3d_renderdoc_begin_capture(command_queue->device->vkd3d_instance->vk_instance);
if (debug_capture && !vkd3d_renderdoc_loaded_api())
{
@ -273,6 +295,9 @@ void vkd3d_renderdoc_command_queue_end_capture(struct d3d12_command_queue *comma
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
VkDebugUtilsLabelEXT capture_label;
if (vkd3d_renderdoc_global_capture_enabled())
return;
if (!vkd3d_renderdoc_loaded_api())
{
/* Magic fallback which lets us bridge the Wine barrier over to Linux RenderDoc. */

View File

@ -26,6 +26,7 @@
bool vkd3d_renderdoc_active(void);
bool vkd3d_renderdoc_loaded_api(void);
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash);
bool vkd3d_renderdoc_global_capture_enabled(void);
bool vkd3d_renderdoc_begin_capture(void *instance);
void vkd3d_renderdoc_end_capture(void *instance);

View File

@ -87,7 +87,7 @@ void test_queue_wait(void)
context.pipeline_state = create_pipeline_state(context.device,
context.root_signature, context.render_target_desc.Format, NULL, &ps, NULL);
cb = create_upload_buffer(device, sizeof(color), NULL);
cb = create_upload_buffer(device, 512, NULL);
resource_desc = ID3D12Resource_GetDesc(context.render_target);
@ -95,20 +95,6 @@ void test_queue_wait(void)
buffer_size = row_pitch * resource_desc.Height;
readback_buffer = create_readback_buffer(device, buffer_size);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0,
ID3D12Resource_GetGPUVirtualAddress(cb));
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
dst_location.pResource = readback_buffer;
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_location.PlacedFootprint.Offset = 0;
@ -120,14 +106,47 @@ void test_queue_wait(void)
src_location.pResource = context.render_target;
src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src_location.SubresourceIndex = 0;
ID3D12GraphicsCommandList_CopyTextureRegion(command_list, &dst_location, 0, 0, 0, &src_location, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0,
ID3D12Resource_GetGPUVirtualAddress(cb));
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
ID3D12GraphicsCommandList_CopyTextureRegion(command_list, &dst_location, 0, 0, 0, &src_location, NULL);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
hr = ID3D12GraphicsCommandList_Close(command_list);
ok(hr == S_OK, "Failed to close command list, hr %#x.\n", hr);
ID3D12GraphicsCommandList *command_list2;
ID3D12Device_CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
context.allocator, NULL, &IID_ID3D12GraphicsCommandList, (void**)&command_list2);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list2, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list2, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list2, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list2, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list2, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list2, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list2, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list2, 0,
ID3D12Resource_GetGPUVirtualAddress(cb) + 0);
ID3D12GraphicsCommandList_DrawInstanced(command_list2, 3, 1, 0, 0);
transition_resource_state(command_list2, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
ID3D12GraphicsCommandList_CopyTextureRegion(command_list2, &dst_location, 0, 0, 0, &src_location, NULL);
transition_resource_state(command_list2, context.render_target,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
hr = ID3D12GraphicsCommandList_Close(command_list2);
ok(hr == S_OK, "Failed to close command list, hr %#x.\n", hr);
/* Wait() with signaled fence */
update_buffer_data(cb, 0, sizeof(green), &green);
queue_wait(queue, fence, 1);
@ -137,28 +156,23 @@ void test_queue_wait(void)
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
release_resource_readback(&rb);
if (!vkd3d_test_platform_is_windows())
{
skip("Wait() is not implemented yet.\n"); /* FIXME */
goto skip_tests;
}
/* Wait() before CPU signal */
update_buffer_data(cb, 0, sizeof(blue), &blue);
queue_wait(queue, fence, 2);
queue_wait(queue, fence, 1000);
exec_command_list(queue, command_list);
queue_signal(queue, fence2, 1);
hr = ID3D12Fence_SetEventOnCompletion(fence2, 1, event);
ok(hr == S_OK, "Failed to set event on completion, hr %#x.\n", hr);
ret = wait_event(event, 0);
ok(ret == WAIT_TIMEOUT, "Got unexpected return value %#x.\n", ret);
init_readback(&rb, readback_buffer, buffer_size, resource_desc.Width, resource_desc.Height, 1, row_pitch);
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
release_resource_readback(&rb);
value = ID3D12Fence_GetCompletedValue(fence2);
ok(value == 0, "Got unexpected value %"PRIu64".\n", value);
hr = ID3D12Fence_Signal(fence, 2);
//ret = wait_event(event, 0);
//ok(ret == WAIT_TIMEOUT, "Got unexpected return value %#x.\n", ret);
//init_readback(&rb, readback_buffer, buffer_size, resource_desc.Width, resource_desc.Height, 1, row_pitch);
//check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
//release_resource_readback(&rb);
//value = ID3D12Fence_GetCompletedValue(fence2);
//ok(value == 0, "Got unexpected value %"PRIu64".\n", value);
hr = ID3D12Fence_Signal(fence, 1000);
ok(hr == S_OK, "Failed to signal fence, hr %#x.\n", hr);
ret = wait_event(event, INFINITE);
ok(ret == WAIT_OBJECT_0, "Got unexpected return value %#x.\n", ret);
@ -170,6 +184,7 @@ void test_queue_wait(void)
value = ID3D12Fence_GetCompletedValue(fence2);
ok(value == 1, "Got unexpected value %"PRIu64".\n", value);
#if 0
/* Wait() before GPU signal */
update_buffer_data(cb, 0, sizeof(green), &green);
queue_wait(queue, fence, 3);
@ -218,7 +233,6 @@ void test_queue_wait(void)
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
release_resource_readback(&rb);
skip_tests:
/* Signal() and Wait() in the same command queue */
update_buffer_data(cb, 0, sizeof(blue), &blue);
queue_signal(queue, fence, 7);
@ -231,6 +245,7 @@ skip_tests:
value = ID3D12Fence_GetCompletedValue(fence);
ok(value == 7, "Got unexpected value %"PRIu64".\n", value);
#endif
destroy_event(event);
ID3D12Fence_Release(fence);
@ -238,6 +253,7 @@ skip_tests:
ID3D12Resource_Release(cb);
ID3D12CommandQueue_Release(queue2);
ID3D12Resource_Release(readback_buffer);
ID3D12GraphicsCommandList_Release(command_list2);
destroy_test_context(&context);
}

View File

@ -61,3 +61,11 @@ executable('descriptor-performance', 'descriptor_performance.c',
c_args : vkd3d_test_flags,
override_options : [ 'c_std='+vkd3d_c_std ],
link_with : [ d3d12_test_utils_lib ])
executable('renderpass-performance', 'renderpass_performance.c',
dependencies : vkd3d_test_deps,
include_directories : vkd3d_private_includes,
install : false,
c_args : vkd3d_test_flags,
override_options : [ 'c_std='+vkd3d_c_std ],
link_with : [ d3d12_test_utils_lib ])

View File

@ -0,0 +1,231 @@
/*
* Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#define INITGUID
#define VKD3D_TEST_DECLARE_MAIN
#include "d3d12_crosstest.h"
struct context
{
ID3D12Device *device;
ID3D12CommandQueue *queue;
ID3D12CommandAllocator *allocator;
ID3D12GraphicsCommandList *list;
ID3D12Resource *resources[64];
ID3D12DescriptorHeap *heap;
};
static void setup(int argc, char **argv)
{
pfn_D3D12CreateDevice = get_d3d12_pfn(D3D12CreateDevice);
pfn_D3D12GetDebugInterface = get_d3d12_pfn(D3D12GetDebugInterface);
parse_args(argc, argv);
enable_d3d12_debug_layer(argc, argv);
init_adapter_info();
pfn_D3D12CreateVersionedRootSignatureDeserializer = get_d3d12_pfn(D3D12CreateVersionedRootSignatureDeserializer);
pfn_D3D12SerializeVersionedRootSignature = get_d3d12_pfn(D3D12SerializeVersionedRootSignature);
}
static double get_time(void)
{
#ifdef _WIN32
LARGE_INTEGER lc, lf;
QueryPerformanceCounter(&lc);
QueryPerformanceFrequency(&lf);
return (double)lc.QuadPart / (double)lf.QuadPart;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec + 1e-9 * ts.tv_nsec;
#endif
}
static bool init_context(struct context *ctx)
{
unsigned int i;
memset(ctx, 0, sizeof(*ctx));
if (!(ctx->device = create_device()))
return false;
ctx->heap = create_cpu_descriptor_heap(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 64);
for (i = 0; i < ARRAY_SIZE(ctx->resources); i++)
{
ctx->resources[i] = create_default_texture2d(ctx->device, 512, 512, 1, 1,
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_RESOURCE_STATE_RENDER_TARGET);
}
ID3D12Device_CreateCommandAllocator(ctx->device, D3D12_COMMAND_LIST_TYPE_DIRECT,
&IID_ID3D12CommandAllocator, (void**)&ctx->allocator);
ctx->queue = create_command_queue(ctx->device, D3D12_COMMAND_LIST_TYPE_DIRECT, 0);
ID3D12Device_CreateCommandList(ctx->device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, ctx->allocator,
NULL, &IID_ID3D12GraphicsCommandList, (void**)&ctx->list);
ID3D12GraphicsCommandList_Close(ctx->list);
for (i = 0; i < 64; i++)
{
D3D12_CPU_DESCRIPTOR_HANDLE h;
h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(ctx->heap);
h.ptr += i * ID3D12Device_GetDescriptorHandleIncrementSize(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
ID3D12Device_CreateRenderTargetView(ctx->device, ctx->resources[i], NULL, h);
}
return true;
}
static void destroy_context(struct context *ctx)
{
unsigned int i;
ID3D12DescriptorHeap_Release(ctx->heap);
for (i = 0; i < ARRAY_SIZE(ctx->resources); i++)
ID3D12Resource_Release(ctx->resources[i]);
ID3D12GraphicsCommandList_Release(ctx->list);
ID3D12CommandAllocator_Release(ctx->allocator);
ID3D12CommandQueue_Release(ctx->queue);
ID3D12Device_Release(ctx->device);
}
enum clear_mode
{
CLEAR_MODE_NULL,
CLEAR_MODE_FULL_RECT,
CLEAR_MODE_PARTIAL
};
static double do_benchmark_run(struct context *ctx, unsigned int clear_iterations,
enum clear_mode mode, bool pre_discard, unsigned int num_resources)
{
const D3D12_RECT partial_rect = { 257, 0, 509, 259 };
const D3D12_RECT full_rect = { 0, 0, 512, 512 };
D3D12_CPU_DESCRIPTOR_HANDLE base_h, h;
D3D12_DISCARD_REGION discard_region;
double start_time, end_time;
unsigned int resource_index;
UINT descriptor_size;
FLOAT clear_color[4];
ID3D12Fence *fence;
unsigned int i;
base_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(ctx->heap);
ID3D12GraphicsCommandList_Reset(ctx->list, ctx->allocator, NULL);
descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
discard_region.FirstSubresource = 0;
discard_region.NumSubresources = 1;
discard_region.NumRects = 0;
discard_region.pRects = NULL;
/* Spam clear on different RTVs and see what happens to perf. */
for (i = 0; i < clear_iterations; i++)
{
resource_index = i & (num_resources - 1);
h = base_h;
h.ptr += resource_index * descriptor_size;
clear_color[0] = (float)(i & 255) / 255.0f;
clear_color[1] = (float)((i + 1) & 255) / 255.0f;
clear_color[2] = (float)((i + 2) & 255) / 255.0f;
clear_color[3] = (float)((i + 3) & 255) / 255.0f;
if (pre_discard)
ID3D12GraphicsCommandList_DiscardResource(ctx->list, ctx->resources[resource_index], &discard_region);
switch (mode)
{
default:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 0, NULL);
break;
case CLEAR_MODE_FULL_RECT:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 1, &full_rect);
break;
case CLEAR_MODE_PARTIAL:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 1, &partial_rect);
break;
}
}
ID3D12GraphicsCommandList_Close(ctx->list);
ID3D12Device_CreateFence(ctx->device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void**)&fence);
start_time = get_time();
ID3D12CommandQueue_ExecuteCommandLists(ctx->queue, 1, (ID3D12CommandList * const *)&ctx->list);
ID3D12CommandQueue_Signal(ctx->queue, fence, 1);
ID3D12Fence_SetEventOnCompletion(fence, 1, NULL);
end_time = get_time();
ID3D12Fence_Release(fence);
ID3D12CommandAllocator_Reset(ctx->allocator);
return end_time - start_time;
}
START_TEST(renderpass_performance)
{
struct test
{
unsigned int iteration_count;
const char *desc;
enum clear_mode mode;
bool pre_discard;
unsigned int num_resources;
};
static const struct test tests[] =
{
{ 8 * 1024, "8k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 32 * 1024, "32k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 128 * 1024, "128k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 8 * 1024, "8k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 32 * 1024, "32k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 128 * 1024, "128k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 8 * 1024, "8k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 32 * 1024, "32k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 128 * 1024, "128k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 128 * 1024, "128k clear, NULL rect, discard", CLEAR_MODE_NULL, true, 64 },
{ 128 * 1024, "128k clear, full rect, discard", CLEAR_MODE_FULL_RECT, true, 64 },
{ 128 * 1024, "128k clear, partial rect, discard", CLEAR_MODE_PARTIAL, true, 64 },
{ 64 * 1024, "64k clear, batch size 1", CLEAR_MODE_NULL, false, 1 },
{ 64 * 1024, "64k clear, batch size 2", CLEAR_MODE_NULL, false, 2 },
{ 64 * 1024, "64k clear, batch size 4", CLEAR_MODE_NULL, false, 4 },
{ 64 * 1024, "64k clear, batch size 8", CLEAR_MODE_NULL, false, 8 },
{ 64 * 1024, "64k clear, batch size 16", CLEAR_MODE_NULL, false, 16 },
{ 64 * 1024, "64k clear, batch size 32", CLEAR_MODE_NULL, false, 32 },
{ 128 * 1024, "128k clear, batch size 1", CLEAR_MODE_NULL, false, 1 },
{ 128 * 1024, "128k clear, batch size 2", CLEAR_MODE_NULL, false, 2 },
{ 128 * 1024, "128k clear, batch size 4", CLEAR_MODE_NULL, false, 4 },
{ 128 * 1024, "128k clear, batch size 8", CLEAR_MODE_NULL, false, 8 },
{ 128 * 1024, "128k clear, batch size 16", CLEAR_MODE_NULL, false, 16 },
{ 128 * 1024, "128k clear, batch size 32", CLEAR_MODE_NULL, false, 32 },
};
struct context ctx;
unsigned int i;
double t;
setup(argc, argv);
if (!init_context(&ctx))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++)
{
t = do_benchmark_run(&ctx, tests[i].iteration_count, tests[i].mode, tests[i].pre_discard, tests[i].num_resources);
printf("[%40s] => %8.3f ms.\n", tests[i].desc, 1e3 * t);
}
destroy_context(&ctx);
}