Compare commits

...

6 Commits

Author SHA1 Message Date
Hans-Kristian Arntzen 11a800b367 vkd3d: Remove redundant is_bound in clear_attachment_pass.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 13:07:38 +02:00
Hans-Kristian Arntzen d38544e0a2 vkd3d: Ignore DiscardResource for committed resources.
Not required for correctness.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 13:07:38 +02:00
Hans-Kristian Arntzen 7139b4de83 vkd3d: Don't transition from UNDEFINED in ClearAttachment unless forced.
The transition from UNDEFINED requires a full barrier and is significantly
slower than just doing the CLEAR.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 13:07:38 +02:00
Hans-Kristian Arntzen e295c38057 renderdoc: Add global capture support.
Useful for test suite since a test can be comprised of several smaller
submissions, and it's easier to debug if we have one trace.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 13:07:38 +02:00
Hans-Kristian Arntzen e414ae3aba tests: Remove obsolete check for lack of wait-before-signal support.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 13:07:38 +02:00
Hans-Kristian Arntzen dd2e7b10a2 tests: Add microbenchmark for ClearRTV perf.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-09-24 13:07:17 +02:00
8 changed files with 344 additions and 23 deletions

View File

@ -203,6 +203,9 @@ pass `-Denable_renderdoc=true` to Meson.
vkd3d-proton will automatically make a capture when a specific shader is encountered.
- `VKD3D_AUTO_CAPTURE_COUNTS` - A comma-separated list of indices. This can be used to control which queue submissions to capture.
E.g., use `VKD3D_AUTO_CAPTURE_COUNTS=0,4,10` to capture the 0th (first submission), 4th and 10th submissions which are candidates for capturing.
If `VKD3D_AUTO_CAPTURE_COUNTS` is `-1`, the entire app runtime can be turned into one big capture.
This is only intended to be used when capturing something like the test suite,
or tiny applications with a finite runtime to make it easier to debug cross submission work.
If only `VKD3D_AUTO_CAPTURE_COUNTS` is set, any queue submission is considered for capturing.
If only `VKD3D_AUTO_CAPTURE_SHADER` is set, `VKD3D_AUTO_CAPTURE_COUNTS` is considered to be equal to `"0"`, i.e. a capture is only

View File

@ -2438,9 +2438,11 @@ static VkImageLayout vk_separate_stencil_layout(VkImageLayout combined_layout)
VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL;
}
static bool d3d12_resource_may_alias_other_resources(struct d3d12_resource *resource);
static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *list, struct d3d12_resource *resource,
struct vkd3d_view *view, VkImageAspectFlags clear_aspects, const VkClearValue *clear_value, UINT rect_count,
const D3D12_RECT *rects, bool is_bound)
const D3D12_RECT *rects)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VkAttachmentDescriptionStencilLayout stencil_attachment_desc;
@ -2453,6 +2455,8 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
VkSubpassEndInfoKHR subpass_end_info;
VkRenderPassCreateInfo2KHR pass_info;
VkRenderPassBeginInfo begin_info;
bool require_full_src_barrier;
bool require_full_dst_barrier;
VkFramebuffer vk_framebuffer;
VkRenderPass vk_render_pass;
VkPipelineStageFlags stages;
@ -2461,8 +2465,16 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
VkAccessFlags access;
VkExtent3D extent;
bool clear_op;
bool discard;
VkResult vr;
/* There is no reason to introduce a full barrier for a clear unless we are forced by
* the API to inject a true discard.
* For aliased resources in D3D12, a full clear can complete the alias ownership transfer,
* and we must be conservative and use UNDEFINED oldLayout. For committed resources (common case),
* this can never happen and we rely on the initial transition mechanism instead. */
discard = d3d12_resource_may_alias_other_resources(resource);
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
attachment_desc.pNext = NULL;
attachment_desc.flags = 0;
@ -2479,10 +2491,7 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
if (clear_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
{
if (is_bound)
attachment_desc.initialLayout = list->dsv_layout;
else
attachment_desc.initialLayout = d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL);
attachment_desc.initialLayout = d3d12_command_list_get_depth_stencil_resource_layout(list, resource, NULL);
if (separate_ds_layouts)
{
@ -2562,7 +2571,7 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
/* Ignore 3D images as re-initializing those may cause us to
* discard the entire image, not just the layers to clear. */
if (resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
if (discard && resource->desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D)
{
if (separate_ds_layouts)
{
@ -2598,28 +2607,60 @@ static void d3d12_command_list_clear_attachment_pass(struct d3d12_command_list *
subpass_desc.pColorAttachments = &attachment_ref;
}
require_full_src_barrier = (attachment_desc.initialLayout != attachment_ref.layout) ||
(separate_ds_layouts && (stencil_attachment_desc.stencilInitialLayout != stencil_attachment_ref.stencilLayout));
require_full_dst_barrier = (attachment_desc.finalLayout != attachment_ref.layout) ||
(separate_ds_layouts && (stencil_attachment_desc.stencilFinalLayout != stencil_attachment_ref.stencilLayout));
dependencies[0].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
dependencies[0].pNext = NULL;
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = stages;
dependencies[0].dstStageMask = stages;
dependencies[0].srcAccessMask = clear_op ? access : 0;
dependencies[0].dstAccessMask = access;
dependencies[0].dependencyFlags = 0;
dependencies[0].viewOffset = 0;
if (require_full_src_barrier)
{
dependencies[0].srcAccessMask = clear_op ? access : 0;
dependencies[0].dstAccessMask = access;
dependencies[0].dependencyFlags = 0;
}
else
{
/* Same setup as normal render passes.
* The 0 access mask is technically a hack.
* See vkd3d_render_pass_cache_create_pass_locked(). */
dependencies[0].srcAccessMask = 0;
dependencies[0].dstAccessMask = 0;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
}
dependencies[1].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
dependencies[1].pNext = NULL;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = stages;
dependencies[1].dstStageMask = stages;
dependencies[1].srcAccessMask = access;
dependencies[1].dstAccessMask = 0;
dependencies[1].dependencyFlags = 0;
dependencies[1].viewOffset = 0;
if (require_full_dst_barrier)
{
dependencies[1].srcAccessMask = access;
dependencies[1].dstAccessMask = 0;
dependencies[1].dependencyFlags = 0;
}
else
{
/* Same setup as normal render passes.
* The 0 access mask is technically a hack.
* See vkd3d_render_pass_cache_create_pass_locked(). */
dependencies[1].srcAccessMask = 0;
dependencies[1].dstAccessMask = 0;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
}
pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
pass_info.pNext = NULL;
pass_info.flags = 0;
@ -7764,7 +7805,7 @@ static void d3d12_command_list_clear_attachment(struct d3d12_command_list *list,
* uses a read-only layout in the current render pass */
d3d12_command_list_end_current_render_pass(list, false);
d3d12_command_list_clear_attachment_pass(list, resource, view,
clear_aspects, clear_value, rect_count, rects, false);
clear_aspects, clear_value, rect_count, rects);
}
else
{
@ -8239,6 +8280,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(d3d12_command_l
return;
}
/* Only care about resources which could alias memory with something else.
* We only need to transition committed resources once,
* and render targets tend to be that.
* There are no particular performance benefits in using DiscardResource as far as I know ... */
if (!d3d12_resource_may_alias_other_resources(texture))
return;
/* Assume that pRegion == NULL means that we should discard
* the entire resource. This does not seem to be documented. */
resource_subresource_count = d3d12_resource_get_sub_resource_count(texture);

View File

@ -2538,6 +2538,12 @@ static void d3d12_device_destroy(struct d3d12_device *device)
vkd3d_memory_allocator_cleanup(&device->memory_allocator, device);
/* Tear down descriptor global info late, so we catch last minute faults after we drain the queues. */
vkd3d_descriptor_debug_free_global_info(device->descriptor_qa_global_info, device);
#ifdef VKD3D_ENABLE_RENDERDOC
if (vkd3d_renderdoc_active() && vkd3d_renderdoc_global_capture_enabled())
vkd3d_renderdoc_end_capture(device->vkd3d_instance->vk_instance);
#endif
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
pthread_mutex_destroy(&device->mutex);
if (device->parent)
@ -5242,6 +5248,12 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
IUnknown_AddRef(device->parent);
d3d12_device_caps_init(device);
#ifdef VKD3D_ENABLE_RENDERDOC
if (vkd3d_renderdoc_active() && vkd3d_renderdoc_global_capture_enabled())
vkd3d_renderdoc_begin_capture(device->vkd3d_instance->vk_instance);
#endif
return S_OK;
out_cleanup_global_pipeline_cache:

View File

@ -42,6 +42,7 @@ static vkd3d_shader_hash_t renderdoc_capture_shader_hash;
static uint32_t *renderdoc_capture_counts;
static size_t renderdoc_capture_counts_count;
static bool vkd3d_renderdoc_is_active;
static bool vkd3d_renderdoc_global_capture;
static void vkd3d_renderdoc_init_capture_count_list(const char *env)
{
@ -49,6 +50,13 @@ static void vkd3d_renderdoc_init_capture_count_list(const char *env)
uint32_t count;
char *endp;
if (strcmp(env, "-1") == 0)
{
INFO("Doing one big capture of the entire lifetime of a device.\n");
vkd3d_renderdoc_global_capture = true;
return;
}
while (*env != '\0')
{
errno = 0;
@ -180,6 +188,11 @@ bool vkd3d_renderdoc_active(void)
return vkd3d_renderdoc_is_active;
}
bool vkd3d_renderdoc_global_capture_enabled(void)
{
return vkd3d_renderdoc_global_capture;
}
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash)
{
return (renderdoc_capture_shader_hash == hash) || (renderdoc_capture_shader_hash == 0);
@ -190,9 +203,12 @@ bool vkd3d_renderdoc_begin_capture(void *instance)
static uint32_t overall_counter;
uint32_t counter;
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
if (!vkd3d_renderdoc_enable_submit_counter(counter))
return false;
if (!vkd3d_renderdoc_global_capture)
{
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
if (!vkd3d_renderdoc_enable_submit_counter(counter))
return false;
}
if (renderdoc_api)
renderdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL);
@ -215,6 +231,9 @@ void vkd3d_renderdoc_command_list_check_capture(struct d3d12_command_list *list,
{
unsigned int i;
if (vkd3d_renderdoc_global_capture_enabled())
return;
if (vkd3d_renderdoc_active() && state)
{
if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
@ -246,6 +265,9 @@ bool vkd3d_renderdoc_command_queue_begin_capture(struct d3d12_command_queue *com
VkDebugUtilsLabelEXT capture_label;
bool debug_capture;
if (vkd3d_renderdoc_global_capture_enabled())
return false;
debug_capture = vkd3d_renderdoc_begin_capture(command_queue->device->vkd3d_instance->vk_instance);
if (debug_capture && !vkd3d_renderdoc_loaded_api())
{
@ -273,6 +295,9 @@ void vkd3d_renderdoc_command_queue_end_capture(struct d3d12_command_queue *comma
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
VkDebugUtilsLabelEXT capture_label;
if (vkd3d_renderdoc_global_capture_enabled())
return;
if (!vkd3d_renderdoc_loaded_api())
{
/* Magic fallback which lets us bridge the Wine barrier over to Linux RenderDoc. */

View File

@ -26,6 +26,7 @@
bool vkd3d_renderdoc_active(void);
bool vkd3d_renderdoc_loaded_api(void);
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash);
bool vkd3d_renderdoc_global_capture_enabled(void);
bool vkd3d_renderdoc_begin_capture(void *instance);
void vkd3d_renderdoc_end_capture(void *instance);

View File

@ -137,12 +137,6 @@ void test_queue_wait(void)
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
release_resource_readback(&rb);
if (!vkd3d_test_platform_is_windows())
{
skip("Wait() is not implemented yet.\n"); /* FIXME */
goto skip_tests;
}
/* Wait() before CPU signal */
update_buffer_data(cb, 0, sizeof(blue), &blue);
queue_wait(queue, fence, 2);
@ -218,7 +212,6 @@ void test_queue_wait(void)
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
release_resource_readback(&rb);
skip_tests:
/* Signal() and Wait() in the same command queue */
update_buffer_data(cb, 0, sizeof(blue), &blue);
queue_signal(queue, fence, 7);

View File

@ -61,3 +61,11 @@ executable('descriptor-performance', 'descriptor_performance.c',
c_args : vkd3d_test_flags,
override_options : [ 'c_std='+vkd3d_c_std ],
link_with : [ d3d12_test_utils_lib ])
executable('renderpass-performance', 'renderpass_performance.c',
dependencies : vkd3d_test_deps,
include_directories : vkd3d_private_includes,
install : false,
c_args : vkd3d_test_flags,
override_options : [ 'c_std='+vkd3d_c_std ],
link_with : [ d3d12_test_utils_lib ])

View File

@ -0,0 +1,231 @@
/*
* Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#define INITGUID
#define VKD3D_TEST_DECLARE_MAIN
#include "d3d12_crosstest.h"
struct context
{
ID3D12Device *device;
ID3D12CommandQueue *queue;
ID3D12CommandAllocator *allocator;
ID3D12GraphicsCommandList *list;
ID3D12Resource *resources[64];
ID3D12DescriptorHeap *heap;
};
static void setup(int argc, char **argv)
{
pfn_D3D12CreateDevice = get_d3d12_pfn(D3D12CreateDevice);
pfn_D3D12GetDebugInterface = get_d3d12_pfn(D3D12GetDebugInterface);
parse_args(argc, argv);
enable_d3d12_debug_layer(argc, argv);
init_adapter_info();
pfn_D3D12CreateVersionedRootSignatureDeserializer = get_d3d12_pfn(D3D12CreateVersionedRootSignatureDeserializer);
pfn_D3D12SerializeVersionedRootSignature = get_d3d12_pfn(D3D12SerializeVersionedRootSignature);
}
static double get_time(void)
{
#ifdef _WIN32
LARGE_INTEGER lc, lf;
QueryPerformanceCounter(&lc);
QueryPerformanceFrequency(&lf);
return (double)lc.QuadPart / (double)lf.QuadPart;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec + 1e-9 * ts.tv_nsec;
#endif
}
static bool init_context(struct context *ctx)
{
unsigned int i;
memset(ctx, 0, sizeof(*ctx));
if (!(ctx->device = create_device()))
return false;
ctx->heap = create_cpu_descriptor_heap(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 64);
for (i = 0; i < ARRAY_SIZE(ctx->resources); i++)
{
ctx->resources[i] = create_default_texture2d(ctx->device, 512, 512, 1, 1,
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_RESOURCE_STATE_RENDER_TARGET);
}
ID3D12Device_CreateCommandAllocator(ctx->device, D3D12_COMMAND_LIST_TYPE_DIRECT,
&IID_ID3D12CommandAllocator, (void**)&ctx->allocator);
ctx->queue = create_command_queue(ctx->device, D3D12_COMMAND_LIST_TYPE_DIRECT, 0);
ID3D12Device_CreateCommandList(ctx->device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, ctx->allocator,
NULL, &IID_ID3D12GraphicsCommandList, (void**)&ctx->list);
ID3D12GraphicsCommandList_Close(ctx->list);
for (i = 0; i < 64; i++)
{
D3D12_CPU_DESCRIPTOR_HANDLE h;
h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(ctx->heap);
h.ptr += i * ID3D12Device_GetDescriptorHandleIncrementSize(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
ID3D12Device_CreateRenderTargetView(ctx->device, ctx->resources[i], NULL, h);
}
return true;
}
static void destroy_context(struct context *ctx)
{
unsigned int i;
ID3D12DescriptorHeap_Release(ctx->heap);
for (i = 0; i < ARRAY_SIZE(ctx->resources); i++)
ID3D12Resource_Release(ctx->resources[i]);
ID3D12GraphicsCommandList_Release(ctx->list);
ID3D12CommandAllocator_Release(ctx->allocator);
ID3D12CommandQueue_Release(ctx->queue);
ID3D12Device_Release(ctx->device);
}
enum clear_mode
{
CLEAR_MODE_NULL,
CLEAR_MODE_FULL_RECT,
CLEAR_MODE_PARTIAL
};
static double do_benchmark_run(struct context *ctx, unsigned int clear_iterations,
enum clear_mode mode, bool pre_discard, unsigned int num_resources)
{
const D3D12_RECT partial_rect = { 257, 0, 509, 259 };
const D3D12_RECT full_rect = { 0, 0, 512, 512 };
D3D12_CPU_DESCRIPTOR_HANDLE base_h, h;
D3D12_DISCARD_REGION discard_region;
double start_time, end_time;
unsigned int resource_index;
UINT descriptor_size;
FLOAT clear_color[4];
ID3D12Fence *fence;
unsigned int i;
base_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(ctx->heap);
ID3D12GraphicsCommandList_Reset(ctx->list, ctx->allocator, NULL);
descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(ctx->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
discard_region.FirstSubresource = 0;
discard_region.NumSubresources = 1;
discard_region.NumRects = 0;
discard_region.pRects = NULL;
/* Spam clear on different RTVs and see what happens to perf. */
for (i = 0; i < clear_iterations; i++)
{
resource_index = i & (num_resources - 1);
h = base_h;
h.ptr += resource_index * descriptor_size;
clear_color[0] = (float)(i & 255) / 255.0f;
clear_color[1] = (float)((i + 1) & 255) / 255.0f;
clear_color[2] = (float)((i + 2) & 255) / 255.0f;
clear_color[3] = (float)((i + 3) & 255) / 255.0f;
if (pre_discard)
ID3D12GraphicsCommandList_DiscardResource(ctx->list, ctx->resources[resource_index], &discard_region);
switch (mode)
{
default:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 0, NULL);
break;
case CLEAR_MODE_FULL_RECT:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 1, &full_rect);
break;
case CLEAR_MODE_PARTIAL:
ID3D12GraphicsCommandList_ClearRenderTargetView(ctx->list, h, clear_color, 1, &partial_rect);
break;
}
}
ID3D12GraphicsCommandList_Close(ctx->list);
ID3D12Device_CreateFence(ctx->device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void**)&fence);
start_time = get_time();
ID3D12CommandQueue_ExecuteCommandLists(ctx->queue, 1, (ID3D12CommandList * const *)&ctx->list);
ID3D12CommandQueue_Signal(ctx->queue, fence, 1);
ID3D12Fence_SetEventOnCompletion(fence, 1, NULL);
end_time = get_time();
ID3D12Fence_Release(fence);
ID3D12CommandAllocator_Reset(ctx->allocator);
return end_time - start_time;
}
START_TEST(renderpass_performance)
{
struct test
{
unsigned int iteration_count;
const char *desc;
enum clear_mode mode;
bool pre_discard;
unsigned int num_resources;
};
static const struct test tests[] =
{
{ 8 * 1024, "8k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 32 * 1024, "32k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 128 * 1024, "128k clear, NULL rect", CLEAR_MODE_NULL, false, 64 },
{ 8 * 1024, "8k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 32 * 1024, "32k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 128 * 1024, "128k clear, full rect", CLEAR_MODE_FULL_RECT, false, 64 },
{ 8 * 1024, "8k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 32 * 1024, "32k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 128 * 1024, "128k clear, partial rects", CLEAR_MODE_PARTIAL, false, 64 },
{ 128 * 1024, "128k clear, NULL rect, discard", CLEAR_MODE_NULL, true, 64 },
{ 128 * 1024, "128k clear, full rect, discard", CLEAR_MODE_FULL_RECT, true, 64 },
{ 128 * 1024, "128k clear, partial rect, discard", CLEAR_MODE_PARTIAL, true, 64 },
{ 64 * 1024, "64k clear, batch size 1", CLEAR_MODE_NULL, false, 1 },
{ 64 * 1024, "64k clear, batch size 2", CLEAR_MODE_NULL, false, 2 },
{ 64 * 1024, "64k clear, batch size 4", CLEAR_MODE_NULL, false, 4 },
{ 64 * 1024, "64k clear, batch size 8", CLEAR_MODE_NULL, false, 8 },
{ 64 * 1024, "64k clear, batch size 16", CLEAR_MODE_NULL, false, 16 },
{ 64 * 1024, "64k clear, batch size 32", CLEAR_MODE_NULL, false, 32 },
{ 128 * 1024, "128k clear, batch size 1", CLEAR_MODE_NULL, false, 1 },
{ 128 * 1024, "128k clear, batch size 2", CLEAR_MODE_NULL, false, 2 },
{ 128 * 1024, "128k clear, batch size 4", CLEAR_MODE_NULL, false, 4 },
{ 128 * 1024, "128k clear, batch size 8", CLEAR_MODE_NULL, false, 8 },
{ 128 * 1024, "128k clear, batch size 16", CLEAR_MODE_NULL, false, 16 },
{ 128 * 1024, "128k clear, batch size 32", CLEAR_MODE_NULL, false, 32 },
};
struct context ctx;
unsigned int i;
double t;
setup(argc, argv);
if (!init_context(&ctx))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++)
{
t = do_benchmark_run(&ctx, tests[i].iteration_count, tests[i].mode, tests[i].pre_discard, tests[i].num_resources);
printf("[%40s] => %8.3f ms.\n", tests[i].desc, 1e3 * t);
}
destroy_context(&ctx);
}