vkd3d-proton/tests/descriptor_performance.c

247 lines
9.2 KiB
C

/*
* Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#define INITGUID
#define VKD3D_TEST_DECLARE_MAIN
#include "d3d12_crosstest.h"
static void setup(int argc, char **argv)
{
pfn_D3D12CreateDevice = get_d3d12_pfn(D3D12CreateDevice);
pfn_D3D12EnableExperimentalFeatures = get_d3d12_pfn(D3D12EnableExperimentalFeatures);
pfn_D3D12GetDebugInterface = get_d3d12_pfn(D3D12GetDebugInterface);
parse_args(argc, argv);
enable_d3d12_debug_layer(argc, argv);
init_adapter_info();
pfn_D3D12CreateVersionedRootSignatureDeserializer = get_d3d12_pfn(D3D12CreateVersionedRootSignatureDeserializer);
pfn_D3D12SerializeVersionedRootSignature = get_d3d12_pfn(D3D12SerializeVersionedRootSignature);
}
static double get_time(void)
{
#ifdef _WIN32
LARGE_INTEGER lc, lf;
QueryPerformanceCounter(&lc);
QueryPerformanceFrequency(&lf);
return (double)lc.QuadPart / (double)lf.QuadPart;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec + 1e-9 * ts.tv_nsec;
#endif
}
static void fill_descriptor_heap_srv(ID3D12Device *device, ID3D12DescriptorHeap *heap,
ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, unsigned int count)
{
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle;
UINT stride, i;
stride = ID3D12Device_GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
cpu_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap);
for (i = 0; i < count; i++)
{
ID3D12Device_CreateShaderResourceView(device, resource, desc, cpu_handle);
cpu_handle.ptr += stride;
}
}
static void copy_descriptor_heap(ID3D12Device *device, ID3D12DescriptorHeap *gpu_heap,
ID3D12DescriptorHeap *cpu_heap, unsigned int count)
{
ID3D12Device_CopyDescriptorsSimple(device, count,
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu_heap),
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_heap),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
static void copy_descriptor_heap_single(ID3D12Device *device, ID3D12DescriptorHeap *gpu_heap,
ID3D12DescriptorHeap *cpu_heap, unsigned int count)
{
D3D12_CPU_DESCRIPTOR_HANDLE gpu;
D3D12_CPU_DESCRIPTOR_HANDLE cpu;
UINT64 increment;
unsigned int i;
gpu = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu_heap);
cpu = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_heap);
increment = ID3D12Device_GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
for (i = 0; i < count; i++)
{
ID3D12Device_CopyDescriptorsSimple(device, 1, gpu, cpu,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
gpu.ptr += increment;
cpu.ptr += increment;
}
}
static void zero_descriptor_heap(ID3D12Device *device, ID3D12DescriptorHeap *heap, unsigned int count)
{
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
memset(&srv_desc, 0, sizeof(srv_desc));
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Texture2D.MipLevels = 1;
fill_descriptor_heap_srv(device, heap, NULL, &srv_desc, count);
}
static void do_benchmark_run(ID3D12Device *device)
{
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
ID3D12DescriptorHeap *gpu_heap;
ID3D12DescriptorHeap *cpu_heap;
double start_time, end_time;
ID3D12Resource *texture;
HRESULT hr;
heap_desc.NumDescriptors = 1000000;
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, (void**)&cpu_heap);
ok(SUCCEEDED(hr), "Failed to create descriptor heap, hr #%x.\n", hr);
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
hr = ID3D12Device_CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, (void**)&gpu_heap);
ok(SUCCEEDED(hr), "Failed to create descriptor heap, hr #%x.\n", hr);
texture = create_default_texture2d(device,
256, 256, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM,
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
ok(texture != NULL, "Failed to create texture.\n");
srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Texture2D.MostDetailedMip = 0;
srv_desc.Texture2D.PlaneSlice = 0;
srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
/* Benchmark creation of 1 million SRVs in CPU-only heaps. */
{
start_time = get_time();
fill_descriptor_heap_srv(device, cpu_heap, texture, &srv_desc, 1000000);
end_time = get_time();
printf("Creating 1M SRVs on blank CPU heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Do the same thing again, but this time on a used heap, so we also have to destroy existing views. */
{
start_time = get_time();
fill_descriptor_heap_srv(device, cpu_heap, texture, &srv_desc, 1000000);
end_time = get_time();
printf("Creating 1M SRVs on dirty CPU heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Fill shader visible heaps */
{
start_time = get_time();
fill_descriptor_heap_srv(device, gpu_heap, texture, &srv_desc, 1000000);
end_time = get_time();
printf("Creating 1M SRVs on blank GPU-visible heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Do the same thing again, but this time on a used heap, so we also have to destroy existing views. */
{
start_time = get_time();
fill_descriptor_heap_srv(device, gpu_heap, texture, &srv_desc, 1000000);
end_time = get_time();
printf("Creating 1M SRVs on dirty GPU-visible heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Try copying descriptors */
{
start_time = get_time();
copy_descriptor_heap(device, gpu_heap, cpu_heap, 1000000);
end_time = get_time();
printf("Copying 1M SRVs to dirty GPU visible heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Try copying descriptors with duplication */
{
start_time = get_time();
copy_descriptor_heap(device, gpu_heap, cpu_heap, 1000000);
end_time = get_time();
printf("Copying 1M SRVs (duplicates) took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Create zero descriptors. */
{
start_time = get_time();
zero_descriptor_heap(device, gpu_heap, 1000000);
end_time = get_time();
printf("Creating 1M null-SRVs took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Try copying descriptors on top of zero-initialized descriptor heap. */
{
start_time = get_time();
copy_descriptor_heap(device, gpu_heap, cpu_heap, 1000000);
end_time = get_time();
printf("Copying 1M SRVs to zeroed GPU visible heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Try copying descriptors one at a time on top of zero-initialized descriptor heap. */
{
start_time = get_time();
copy_descriptor_heap_single(device, gpu_heap, cpu_heap, 1000000);
end_time = get_time();
printf("Copying 1M individual SRVs to GPU visible heap (duplicates) took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
/* Create zero descriptors. */
zero_descriptor_heap(device, gpu_heap, 1000000);
{
start_time = get_time();
copy_descriptor_heap_single(device, gpu_heap, cpu_heap, 1000000);
end_time = get_time();
printf("Copying 1M individual SRVs to zeroed GPU visible heap took: %.3f ms.\n", 1e3 * (end_time - start_time));
}
ID3D12Resource_Release(texture);
ID3D12DescriptorHeap_Release(cpu_heap);
ID3D12DescriptorHeap_Release(gpu_heap);
}
START_TEST(descriptor_performance)
{
ID3D12Device *device;
unsigned int i;
setup(argc, argv);
device = create_device();
ok(device != NULL, "Failed to create device.\n");
for (i = 0; i < 100; i++)
do_benchmark_run(device);
ID3D12Device_Release(device);
}