tests: Add microbenchmark for ClearRTV perf.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-09-23 15:34:18 +02:00
parent bde3ad8e01
commit cc519d4492
2 changed files with 239 additions and 0 deletions

View File

@ -61,3 +61,11 @@ executable('descriptor-performance', 'descriptor_performance.c',
c_args : vkd3d_test_flags,
override_options : [ 'c_std='+vkd3d_c_std ],
link_with : [ d3d12_test_utils_lib ])
executable('renderpass-performance', 'renderpass_performance.c',
dependencies : vkd3d_test_deps,
include_directories : vkd3d_private_includes,
install : false,
c_args : vkd3d_test_flags,
override_options : [ 'c_std='+vkd3d_c_std ],
link_with : [ d3d12_test_utils_lib ])

View File

@ -0,0 +1,231 @@
/*
* Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#define INITGUID
#define VKD3D_TEST_DECLARE_MAIN
#include "d3d12_crosstest.h"
struct context
{
ID3D12Device *device;
ID3D12CommandQueue *queue;
ID3D12CommandAllocator *allocator;
ID3D12GraphicsCommandList *list;
ID3D12Resource *resources[64];
ID3D12DescriptorHeap *heap;
};
static void setup(int argc, char **argv)
{
pfn_D3D12CreateDevice = get_d3d12_pfn(D3D12CreateDevice);
pfn_D3D12GetDebugInterface = get_d3d12_pfn(D3D12GetDebugInterface);
parse_args(argc, argv);
enable_d3d12_debug_layer(argc, argv);
init_adapter_info();
pfn_D3D12CreateVersionedRootSignatureDeserializer = get_d3d12_pfn(D3D12CreateVersionedRootSignatureDeserializer);
pfn_D3D12SerializeVersionedRootSignature = get_d3d12_pfn(D3D12SerializeVersionedRootSignature);
}
static double get_time(void)
{
#ifdef _WIN32
LARGE_INTEGER lc, lf;
QueryPerformanceCounter(&lc);
QueryPerformanceFrequency(&lf);
return (double)lc.QuadPart / (double)lf.QuadPart;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec + 1e-9 * ts.tv_nsec;
#endif
}
static bool init_context(struct context *ctx)
{
unsigned int i;
memset(ctx, 0, sizeof(*ctx));
if (!(ctx->device = create_device()))
return false;
ctx->heap = create_cpu_descriptor_heap(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 64);
for (i = 0; i < ARRAY_SIZE(ctx->resources); i++)
{
ctx->resources[i] = create_default_texture2d(ctx->device, 1024, 1024, 1, 1,
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_RESOURCE_STATE_RENDER_TARGET);
}
ID3D12Device_CreateCommandAllocator(ctx->device, D3D12_COMMAND_LIST_TYPE_DIRECT,
&IID_ID3D12CommandAllocator, (void**)&ctx->allocator);
ctx->queue = create_command_queue(ctx->device, D3D12_COMMAND_LIST_TYPE_DIRECT, 0);
ID3D12Device_CreateCommandList(ctx->device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, ctx->allocator,
NULL, &IID_ID3D12GraphicsCommandList, (void**)&ctx->list);
ID3D12GraphicsCommandList_Close(ctx->list);
for (i = 0; i < 64; i++)
{
D3D12_CPU_DESCRIPTOR_HANDLE h;
h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(ctx->heap);
h.ptr += i * ID3D12Device_GetDescriptorHandleIncrementSize(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
ID3D12Device_CreateRenderTargetView(ctx->device, ctx->resources[i], NULL, h);
}
return true;
}
static void destroy_context(struct context *ctx)
{
unsigned int i;
ID3D12DescriptorHeap_Release(ctx->heap);
for (i = 0; i < ARRAY_SIZE(ctx->resources); i++)
ID3D12Resource_Release(ctx->resources[i]);
ID3D12GraphicsCommandList_Release(ctx->list);
ID3D12CommandAllocator_Release(ctx->allocator);
ID3D12CommandQueue_Release(ctx->queue);
ID3D12Device_Release(ctx->device);
}
enum clear_mode
{
CLEAR_MODE_NULL,
CLEAR_MODE_FULL_RECT,
CLEAR_MODE_PARTIAL
};
static double do_benchmark_run(struct context *ctx, unsigned int clear_iterations,
enum clear_mode mode, bool pre_discard, unsigned int num_resources)
{
const D3D12_RECT partial_rect = { 513, 0, 1018, 518 };
const D3D12_RECT full_rect = { 0, 0, 1024, 1024 };
D3D12_CPU_DESCRIPTOR_HANDLE base_h, h;
D3D12_DISCARD_REGION discard_region;
double start_time, end_time;
unsigned int resource_index;
UINT descriptor_size;
FLOAT clear_color[4];
ID3D12Fence *fence;
unsigned int i;
base_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(ctx->heap);
ID3D12GraphicsCommandList_Reset(ctx->list, ctx->allocator, NULL);
descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
discard_region.FirstSubresource = 0;
discard_region.NumSubresources = 1;
discard_region.NumRects = 0;
discard_region.pRects = NULL;
/* Spam clear on different RTVs and see what happens to perf. */
for (i = 0; i < clear_iterations; i++)
{
resource_index = i & (num_resources - 1);
h = base_h;
h.ptr += resource_index * descriptor_size;
clear_color[0] = (float)(i & 255) / 255.0f;
clear_color[1] = (float)((i + 1) & 255) / 255.0f;
clear_color[2] = (float)((i + 2) & 255) / 255.0f;
clear_color[3] = (float)((i + 3) & 255) / 255.0f;
if (pre_discard)
ID3D12GraphicsCommandList_DiscardResource(ctx->list, ctx->resources[resource_index], &discard_region);
switch (mode)
{
default:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 0, NULL);
break;
case CLEAR_MODE_FULL_RECT:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 1, &full_rect);
break;
case CLEAR_MODE_PARTIAL:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 1, &partial_rect);
break;
}
}
ID3D12GraphicsCommandList_Close(ctx->list);
ID3D12Device_CreateFence(ctx->device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void**)&fence);
start_time = get_time();
ID3D12CommandQueue_ExecuteCommandLists(ctx->queue, 1, (ID3D12CommandList * const *)&ctx->list);
ID3D12CommandQueue_Signal(ctx->queue, fence, 1);
ID3D12Fence_SetEventOnCompletion(fence, 1, NULL);
end_time = get_time();
ID3D12Fence_Release(fence);
ID3D12CommandAllocator_Reset(ctx->allocator);
return end_time - start_time;
}
START_TEST(renderpass_performance)
{
struct test
{
unsigned int iteration_count;
const char *desc;
enum clear_mode mode;
bool pre_discard;
unsigned int num_resources;
};
static const struct test tests[] =
{
{ 8 * 1024, "8k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 32 * 1024, "32k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 128 * 1024, "128k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 8 * 1024, "8k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 32 * 1024, "32k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 128 * 1024, "128k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 8 * 1024, "8k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 32 * 1024, "32k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 128 * 1024, "128k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 128 * 1024, "128k clear, NULL rect, discard", CLEAR_MODE_NULL, true, 64 },
{ 128 * 1024, "128k clear, full rect, discard", CLEAR_MODE_FULL_RECT, true, 64 },
{ 128 * 1024, "128k clear, partial rect, discard", CLEAR_MODE_PARTIAL, true, 64 },
{ 64 * 1024, "64k clear, batch size 1", CLEAR_MODE_NULL, false, 1 },
{ 64 * 1024, "64k clear, batch size 2", CLEAR_MODE_NULL, false, 2 },
{ 64 * 1024, "64k clear, batch size 4", CLEAR_MODE_NULL, false, 4 },
{ 64 * 1024, "64k clear, batch size 8", CLEAR_MODE_NULL, false, 8 },
{ 64 * 1024, "64k clear, batch size 16", CLEAR_MODE_NULL, false, 16 },
{ 64 * 1024, "64k clear, batch size 32", CLEAR_MODE_NULL, false, 32 },
{ 128 * 1024, "128k clear, batch size 1", CLEAR_MODE_NULL, false, 1 },
{ 128 * 1024, "128k clear, batch size 2", CLEAR_MODE_NULL, false, 2 },
{ 128 * 1024, "128k clear, batch size 4", CLEAR_MODE_NULL, false, 4 },
{ 128 * 1024, "128k clear, batch size 8", CLEAR_MODE_NULL, false, 8 },
{ 128 * 1024, "128k clear, batch size 16", CLEAR_MODE_NULL, false, 16 },
{ 128 * 1024, "128k clear, batch size 32", CLEAR_MODE_NULL, false, 32 },
};
struct context ctx;
unsigned int i;
double t;
setup(argc, argv);
if (!init_context(&ctx))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++)
{
t = do_benchmark_run(&ctx, tests[i].iteration_count, tests[i].mode, tests[i].pre_discard, tests[i].num_resources);
printf("[%40s] => %8.3f ms.\n", tests[i].desc, 1e3 * t);
}
destroy_context(&ctx);
}