mirror of https://gitlab.freedesktop.org/mesa/mesa
391 lines
13 KiB
C
391 lines
13 KiB
C
/*
|
||
* Copyright 2019 Google LLC
|
||
* SPDX-License-Identifier: MIT
|
||
*
|
||
* based in part on anv and radv which are:
|
||
* Copyright © 2015 Intel Corporation
|
||
* Copyright © 2016 Red Hat.
|
||
* Copyright © 2016 Bas Nieuwenhuizen
|
||
*/
|
||
|
||
#include "vn_query_pool.h"
|
||
|
||
#include "venus-protocol/vn_protocol_driver_query_pool.h"
|
||
|
||
#include "vn_device.h"
|
||
#include "vn_feedback.h"
|
||
#include "vn_physical_device.h"
|
||
|
||
/* query pool commands */
|
||
|
||
VkResult
|
||
vn_CreateQueryPool(VkDevice device,
|
||
const VkQueryPoolCreateInfo *pCreateInfo,
|
||
const VkAllocationCallbacks *pAllocator,
|
||
VkQueryPool *pQueryPool)
|
||
{
|
||
struct vn_device *dev = vn_device_from_handle(device);
|
||
const VkAllocationCallbacks *alloc =
|
||
pAllocator ? pAllocator : &dev->base.base.alloc;
|
||
|
||
struct vn_query_pool *pool =
|
||
vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN,
|
||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||
if (!pool)
|
||
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
|
||
vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base);
|
||
|
||
pool->allocator = *alloc;
|
||
pool->query_count = pCreateInfo->queryCount;
|
||
|
||
simple_mtx_init(&pool->mutex, mtx_plain);
|
||
|
||
switch (pCreateInfo->queryType) {
|
||
case VK_QUERY_TYPE_OCCLUSION:
|
||
/*
|
||
* Occlusion queries write one integer value - the number of samples
|
||
* passed.
|
||
*/
|
||
pool->result_array_size = 1;
|
||
break;
|
||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||
/*
|
||
* Pipeline statistics queries write one integer value for each bit that
|
||
* is enabled in the pipelineStatistics when the pool is created, and
|
||
* the statistics values are written in bit order starting from the
|
||
* least significant bit.
|
||
*/
|
||
pool->result_array_size =
|
||
util_bitcount(pCreateInfo->pipelineStatistics);
|
||
break;
|
||
case VK_QUERY_TYPE_TIMESTAMP:
|
||
/* Timestamp queries write one integer value. */
|
||
pool->result_array_size = 1;
|
||
break;
|
||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||
/*
|
||
* Transform feedback queries write two integers; the first integer is
|
||
* the number of primitives successfully written to the corresponding
|
||
* transform feedback buffer and the second is the number of primitives
|
||
* output to the vertex stream, regardless of whether they were
|
||
* successfully captured or not.
|
||
*/
|
||
pool->result_array_size = 2;
|
||
break;
|
||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||
/*
|
||
* Primitives generated queries write one integer value; the number of
|
||
* primitives output to the vertex stream, regardless of whether
|
||
* transform feedback is active or not, or whether they were
|
||
* successfully captured by transform feedback or not. This is identical
|
||
* to the second integer of the transform feedback queries if transform
|
||
* feedback is active.
|
||
*/
|
||
pool->result_array_size = 1;
|
||
break;
|
||
default:
|
||
unreachable("bad query type");
|
||
break;
|
||
}
|
||
|
||
/* Venus has to handle overflow behavior with query feedback to keep
|
||
* consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults.
|
||
* The default query feedback behavior is to wrap on overflow. However, per
|
||
* spec:
|
||
*
|
||
* If an unsigned integer query’s value overflows the result type, the
|
||
* value may either wrap or saturate.
|
||
*
|
||
* We detect the renderer side implementation to align with the
|
||
* implementation specific behavior.
|
||
*/
|
||
switch (dev->physical_device->renderer_driver_id) {
|
||
case VK_DRIVER_ID_ARM_PROPRIETARY:
|
||
case VK_DRIVER_ID_MESA_LLVMPIPE:
|
||
case VK_DRIVER_ID_MESA_TURNIP:
|
||
pool->saturate_on_overflow = true;
|
||
break;
|
||
default:
|
||
break;
|
||
};
|
||
|
||
VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
|
||
vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL,
|
||
&pool_handle);
|
||
|
||
*pQueryPool = pool_handle;
|
||
|
||
return VK_SUCCESS;
|
||
}
|
||
|
||
void
|
||
vn_DestroyQueryPool(VkDevice device,
|
||
VkQueryPool queryPool,
|
||
const VkAllocationCallbacks *pAllocator)
|
||
{
|
||
struct vn_device *dev = vn_device_from_handle(device);
|
||
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||
const VkAllocationCallbacks *alloc;
|
||
|
||
if (!pool)
|
||
return;
|
||
|
||
alloc = pAllocator ? pAllocator : &pool->allocator;
|
||
|
||
if (pool->fb_buf)
|
||
vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc);
|
||
|
||
simple_mtx_destroy(&pool->mutex);
|
||
|
||
vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL);
|
||
|
||
vn_object_base_fini(&pool->base);
|
||
vk_free(alloc, pool);
|
||
}
|
||
|
||
void
|
||
vn_ResetQueryPool(VkDevice device,
|
||
VkQueryPool queryPool,
|
||
uint32_t firstQuery,
|
||
uint32_t queryCount)
|
||
{
|
||
struct vn_device *dev = vn_device_from_handle(device);
|
||
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||
|
||
vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery,
|
||
queryCount);
|
||
if (pool->fb_buf) {
|
||
/* Feedback results are always 64 bit and include availability bit
|
||
* (also 64 bit)
|
||
*/
|
||
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
|
||
const uint32_t offset = slot_size * firstQuery;
|
||
memset(pool->fb_buf->data + offset, 0, slot_size * queryCount);
|
||
}
|
||
}
|
||
|
||
static VkResult
|
||
vn_get_query_pool_feedback(struct vn_query_pool *pool,
|
||
uint32_t firstQuery,
|
||
uint32_t queryCount,
|
||
void *pData,
|
||
VkDeviceSize stride,
|
||
VkQueryResultFlags flags)
|
||
{
|
||
VkResult result = VK_SUCCESS;
|
||
/* Feedback results are always 64 bit and include availability bit
|
||
* (also 64 bit)
|
||
*/
|
||
const uint32_t slot_array_size = pool->result_array_size + 1;
|
||
uint64_t *src = pool->fb_buf->data;
|
||
src += slot_array_size * firstQuery;
|
||
|
||
uint32_t dst_index = 0;
|
||
uint32_t src_index = 0;
|
||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||
uint64_t *dst = pData;
|
||
uint32_t index_stride = stride / sizeof(uint64_t);
|
||
for (uint32_t i = 0; i < queryCount; i++) {
|
||
/* Copy the result if its available */
|
||
const uint64_t avail = src[src_index + pool->result_array_size];
|
||
if (avail) {
|
||
memcpy(&dst[dst_index], &src[src_index],
|
||
pool->result_array_size * sizeof(uint64_t));
|
||
} else {
|
||
result = VK_NOT_READY;
|
||
/* valid to return result of 0 if partial bit is set */
|
||
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
|
||
memset(&dst[dst_index], 0,
|
||
pool->result_array_size * sizeof(uint64_t));
|
||
}
|
||
}
|
||
/* Set the availability bit if requested */
|
||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||
dst[dst_index + pool->result_array_size] = avail;
|
||
|
||
dst_index += index_stride;
|
||
src_index += slot_array_size;
|
||
}
|
||
} else {
|
||
uint32_t *dst = pData;
|
||
uint32_t index_stride = stride / sizeof(uint32_t);
|
||
for (uint32_t i = 0; i < queryCount; i++) {
|
||
/* Copy the result if its available, converting down to uint32_t */
|
||
const uint32_t avail =
|
||
(uint32_t)src[src_index + pool->result_array_size];
|
||
if (avail) {
|
||
for (uint32_t j = 0; j < pool->result_array_size; j++) {
|
||
const uint64_t src_val = src[src_index + j];
|
||
dst[dst_index + j] =
|
||
src_val > UINT32_MAX && pool->saturate_on_overflow
|
||
? UINT32_MAX
|
||
: (uint32_t)src_val;
|
||
}
|
||
} else {
|
||
result = VK_NOT_READY;
|
||
/* valid to return result of 0 if partial bit is set */
|
||
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
|
||
for (uint32_t j = 0; j < pool->result_array_size; j++)
|
||
dst[dst_index + j] = 0;
|
||
}
|
||
}
|
||
/* Set the availability bit if requested */
|
||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||
dst[dst_index + pool->result_array_size] = avail;
|
||
|
||
dst_index += index_stride;
|
||
src_index += slot_array_size;
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
static void
|
||
vn_query_feedback_wait_ready(struct vn_device *dev,
|
||
struct vn_query_pool *pool,
|
||
uint32_t first_query,
|
||
uint32_t query_count)
|
||
{
|
||
VN_TRACE_FUNC();
|
||
|
||
/* Feedback results are always 64 bit and include availability bit
|
||
* (also 64 bit)
|
||
*/
|
||
const uint32_t step = pool->result_array_size + 1;
|
||
const uint64_t *avail = (uint64_t *)pool->fb_buf->data +
|
||
first_query * step + pool->result_array_size;
|
||
|
||
struct vn_relax_state relax_state =
|
||
vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY);
|
||
for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) {
|
||
while (!avail[j]) {
|
||
vn_relax(&relax_state);
|
||
}
|
||
}
|
||
vn_relax_fini(&relax_state);
|
||
}
|
||
|
||
VkResult
|
||
vn_GetQueryPoolResults(VkDevice device,
|
||
VkQueryPool queryPool,
|
||
uint32_t firstQuery,
|
||
uint32_t queryCount,
|
||
size_t dataSize,
|
||
void *pData,
|
||
VkDeviceSize stride,
|
||
VkQueryResultFlags flags)
|
||
{
|
||
struct vn_device *dev = vn_device_from_handle(device);
|
||
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||
const VkAllocationCallbacks *alloc = &pool->allocator;
|
||
VkResult result;
|
||
|
||
const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
|
||
const size_t result_size = pool->result_array_size * result_width;
|
||
const bool result_always_written =
|
||
flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
|
||
|
||
/* Get results from feedback buffers
|
||
* Not possible for VK_QUERY_RESULT_PARTIAL_BIT
|
||
*/
|
||
if (pool->fb_buf) {
|
||
/* If wait bit is set, wait poll until query is ready */
|
||
if (flags & VK_QUERY_RESULT_WAIT_BIT)
|
||
vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount);
|
||
|
||
result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
|
||
stride, flags);
|
||
return vn_result(dev->instance, result);
|
||
}
|
||
|
||
VkQueryResultFlags packed_flags = flags;
|
||
size_t packed_stride = result_size;
|
||
if (!result_always_written)
|
||
packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
|
||
if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||
packed_stride += result_width;
|
||
|
||
const size_t packed_size = packed_stride * queryCount;
|
||
void *packed_data;
|
||
if (result_always_written && packed_stride == stride) {
|
||
packed_data = pData;
|
||
} else {
|
||
packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN,
|
||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||
if (!packed_data)
|
||
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||
}
|
||
result = vn_call_vkGetQueryPoolResults(
|
||
dev->primary_ring, device, queryPool, firstQuery, queryCount,
|
||
packed_size, packed_data, packed_stride, packed_flags);
|
||
|
||
if (packed_data == pData)
|
||
return vn_result(dev->instance, result);
|
||
|
||
const size_t copy_size =
|
||
result_size +
|
||
(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0);
|
||
const void *src = packed_data;
|
||
void *dst = pData;
|
||
if (result == VK_SUCCESS) {
|
||
for (uint32_t i = 0; i < queryCount; i++) {
|
||
memcpy(dst, src, copy_size);
|
||
src += packed_stride;
|
||
dst += stride;
|
||
}
|
||
} else if (result == VK_NOT_READY) {
|
||
assert(!result_always_written &&
|
||
(packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
|
||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||
for (uint32_t i = 0; i < queryCount; i++) {
|
||
const bool avail = *(const uint64_t *)(src + result_size);
|
||
if (avail)
|
||
memcpy(dst, src, copy_size);
|
||
else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||
*(uint64_t *)(dst + result_size) = 0;
|
||
|
||
src += packed_stride;
|
||
dst += stride;
|
||
}
|
||
} else {
|
||
for (uint32_t i = 0; i < queryCount; i++) {
|
||
const bool avail = *(const uint32_t *)(src + result_size);
|
||
if (avail)
|
||
memcpy(dst, src, copy_size);
|
||
else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||
*(uint32_t *)(dst + result_size) = 0;
|
||
|
||
src += packed_stride;
|
||
dst += stride;
|
||
}
|
||
}
|
||
}
|
||
|
||
vk_free(alloc, packed_data);
|
||
return vn_result(dev->instance, result);
|
||
}
|
||
|
||
VkResult
|
||
vn_query_feedback_buffer_init_once(struct vn_device *dev,
|
||
struct vn_query_pool *pool)
|
||
{
|
||
VkResult result = VK_SUCCESS;
|
||
|
||
simple_mtx_lock(&pool->mutex);
|
||
if (pool->fb_buf)
|
||
goto out_unlock;
|
||
|
||
const uint32_t fb_buf_size =
|
||
(pool->result_array_size + 1) * sizeof(uint64_t) * pool->query_count;
|
||
struct vn_feedback_buffer *fb_buf;
|
||
result =
|
||
vn_feedback_buffer_create(dev, fb_buf_size, &pool->allocator, &fb_buf);
|
||
if (result == VK_SUCCESS)
|
||
pool->fb_buf = fb_buf;
|
||
|
||
out_unlock:
|
||
simple_mtx_unlock(&pool->mutex);
|
||
return result;
|
||
}
|