mesa/src/virtio/vulkan/vn_query_pool.c

391 lines
13 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright 2019 Google LLC
* SPDX-License-Identifier: MIT
*
* based in part on anv and radv which are:
* Copyright © 2015 Intel Corporation
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*/
#include "vn_query_pool.h"
#include "venus-protocol/vn_protocol_driver_query_pool.h"
#include "vn_device.h"
#include "vn_feedback.h"
#include "vn_physical_device.h"
/* query pool commands */
VkResult
vn_CreateQueryPool(VkDevice device,
const VkQueryPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkQueryPool *pQueryPool)
{
struct vn_device *dev = vn_device_from_handle(device);
const VkAllocationCallbacks *alloc =
pAllocator ? pAllocator : &dev->base.base.alloc;
struct vn_query_pool *pool =
vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!pool)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base);
pool->allocator = *alloc;
pool->query_count = pCreateInfo->queryCount;
simple_mtx_init(&pool->mutex, mtx_plain);
switch (pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION:
/*
* Occlusion queries write one integer value - the number of samples
* passed.
*/
pool->result_array_size = 1;
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
/*
* Pipeline statistics queries write one integer value for each bit that
* is enabled in the pipelineStatistics when the pool is created, and
* the statistics values are written in bit order starting from the
* least significant bit.
*/
pool->result_array_size =
util_bitcount(pCreateInfo->pipelineStatistics);
break;
case VK_QUERY_TYPE_TIMESTAMP:
/* Timestamp queries write one integer value. */
pool->result_array_size = 1;
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
/*
* Transform feedback queries write two integers; the first integer is
* the number of primitives successfully written to the corresponding
* transform feedback buffer and the second is the number of primitives
* output to the vertex stream, regardless of whether they were
* successfully captured or not.
*/
pool->result_array_size = 2;
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
/*
* Primitives generated queries write one integer value; the number of
* primitives output to the vertex stream, regardless of whether
* transform feedback is active or not, or whether they were
* successfully captured by transform feedback or not. This is identical
* to the second integer of the transform feedback queries if transform
* feedback is active.
*/
pool->result_array_size = 1;
break;
default:
unreachable("bad query type");
break;
}
/* Venus has to handle overflow behavior with query feedback to keep
* consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults.
* The default query feedback behavior is to wrap on overflow. However, per
* spec:
*
* If an unsigned integer querys value overflows the result type, the
* value may either wrap or saturate.
*
* We detect the renderer side implementation to align with the
* implementation specific behavior.
*/
switch (dev->physical_device->renderer_driver_id) {
case VK_DRIVER_ID_ARM_PROPRIETARY:
case VK_DRIVER_ID_MESA_LLVMPIPE:
case VK_DRIVER_ID_MESA_TURNIP:
pool->saturate_on_overflow = true;
break;
default:
break;
};
VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL,
&pool_handle);
*pQueryPool = pool_handle;
return VK_SUCCESS;
}
void
vn_DestroyQueryPool(VkDevice device,
VkQueryPool queryPool,
const VkAllocationCallbacks *pAllocator)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
const VkAllocationCallbacks *alloc;
if (!pool)
return;
alloc = pAllocator ? pAllocator : &pool->allocator;
if (pool->fb_buf)
vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc);
simple_mtx_destroy(&pool->mutex);
vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL);
vn_object_base_fini(&pool->base);
vk_free(alloc, pool);
}
void
vn_ResetQueryPool(VkDevice device,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery,
queryCount);
if (pool->fb_buf) {
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
const uint32_t offset = slot_size * firstQuery;
memset(pool->fb_buf->data + offset, 0, slot_size * queryCount);
}
}
static VkResult
vn_get_query_pool_feedback(struct vn_query_pool *pool,
uint32_t firstQuery,
uint32_t queryCount,
void *pData,
VkDeviceSize stride,
VkQueryResultFlags flags)
{
VkResult result = VK_SUCCESS;
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_array_size = pool->result_array_size + 1;
uint64_t *src = pool->fb_buf->data;
src += slot_array_size * firstQuery;
uint32_t dst_index = 0;
uint32_t src_index = 0;
if (flags & VK_QUERY_RESULT_64_BIT) {
uint64_t *dst = pData;
uint32_t index_stride = stride / sizeof(uint64_t);
for (uint32_t i = 0; i < queryCount; i++) {
/* Copy the result if its available */
const uint64_t avail = src[src_index + pool->result_array_size];
if (avail) {
memcpy(&dst[dst_index], &src[src_index],
pool->result_array_size * sizeof(uint64_t));
} else {
result = VK_NOT_READY;
/* valid to return result of 0 if partial bit is set */
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
memset(&dst[dst_index], 0,
pool->result_array_size * sizeof(uint64_t));
}
}
/* Set the availability bit if requested */
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
dst[dst_index + pool->result_array_size] = avail;
dst_index += index_stride;
src_index += slot_array_size;
}
} else {
uint32_t *dst = pData;
uint32_t index_stride = stride / sizeof(uint32_t);
for (uint32_t i = 0; i < queryCount; i++) {
/* Copy the result if its available, converting down to uint32_t */
const uint32_t avail =
(uint32_t)src[src_index + pool->result_array_size];
if (avail) {
for (uint32_t j = 0; j < pool->result_array_size; j++) {
const uint64_t src_val = src[src_index + j];
dst[dst_index + j] =
src_val > UINT32_MAX && pool->saturate_on_overflow
? UINT32_MAX
: (uint32_t)src_val;
}
} else {
result = VK_NOT_READY;
/* valid to return result of 0 if partial bit is set */
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
for (uint32_t j = 0; j < pool->result_array_size; j++)
dst[dst_index + j] = 0;
}
}
/* Set the availability bit if requested */
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
dst[dst_index + pool->result_array_size] = avail;
dst_index += index_stride;
src_index += slot_array_size;
}
}
return result;
}
static void
vn_query_feedback_wait_ready(struct vn_device *dev,
struct vn_query_pool *pool,
uint32_t first_query,
uint32_t query_count)
{
VN_TRACE_FUNC();
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t step = pool->result_array_size + 1;
const uint64_t *avail = (uint64_t *)pool->fb_buf->data +
first_query * step + pool->result_array_size;
struct vn_relax_state relax_state =
vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY);
for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) {
while (!avail[j]) {
vn_relax(&relax_state);
}
}
vn_relax_fini(&relax_state);
}
VkResult
vn_GetQueryPoolResults(VkDevice device,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
size_t dataSize,
void *pData,
VkDeviceSize stride,
VkQueryResultFlags flags)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
const VkAllocationCallbacks *alloc = &pool->allocator;
VkResult result;
const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
const size_t result_size = pool->result_array_size * result_width;
const bool result_always_written =
flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
/* Get results from feedback buffers
* Not possible for VK_QUERY_RESULT_PARTIAL_BIT
*/
if (pool->fb_buf) {
/* If wait bit is set, wait poll until query is ready */
if (flags & VK_QUERY_RESULT_WAIT_BIT)
vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount);
result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
stride, flags);
return vn_result(dev->instance, result);
}
VkQueryResultFlags packed_flags = flags;
size_t packed_stride = result_size;
if (!result_always_written)
packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
packed_stride += result_width;
const size_t packed_size = packed_stride * queryCount;
void *packed_data;
if (result_always_written && packed_stride == stride) {
packed_data = pData;
} else {
packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!packed_data)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
result = vn_call_vkGetQueryPoolResults(
dev->primary_ring, device, queryPool, firstQuery, queryCount,
packed_size, packed_data, packed_stride, packed_flags);
if (packed_data == pData)
return vn_result(dev->instance, result);
const size_t copy_size =
result_size +
(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0);
const void *src = packed_data;
void *dst = pData;
if (result == VK_SUCCESS) {
for (uint32_t i = 0; i < queryCount; i++) {
memcpy(dst, src, copy_size);
src += packed_stride;
dst += stride;
}
} else if (result == VK_NOT_READY) {
assert(!result_always_written &&
(packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
if (flags & VK_QUERY_RESULT_64_BIT) {
for (uint32_t i = 0; i < queryCount; i++) {
const bool avail = *(const uint64_t *)(src + result_size);
if (avail)
memcpy(dst, src, copy_size);
else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
*(uint64_t *)(dst + result_size) = 0;
src += packed_stride;
dst += stride;
}
} else {
for (uint32_t i = 0; i < queryCount; i++) {
const bool avail = *(const uint32_t *)(src + result_size);
if (avail)
memcpy(dst, src, copy_size);
else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
*(uint32_t *)(dst + result_size) = 0;
src += packed_stride;
dst += stride;
}
}
}
vk_free(alloc, packed_data);
return vn_result(dev->instance, result);
}
VkResult
vn_query_feedback_buffer_init_once(struct vn_device *dev,
struct vn_query_pool *pool)
{
VkResult result = VK_SUCCESS;
simple_mtx_lock(&pool->mutex);
if (pool->fb_buf)
goto out_unlock;
const uint32_t fb_buf_size =
(pool->result_array_size + 1) * sizeof(uint64_t) * pool->query_count;
struct vn_feedback_buffer *fb_buf;
result =
vn_feedback_buffer_create(dev, fb_buf_size, &pool->allocator, &fb_buf);
if (result == VK_SUCCESS)
pool->fb_buf = fb_buf;
out_unlock:
simple_mtx_unlock(&pool->mutex);
return result;
}