radv: Use correct buffer size for query pool result copies.

1. the dst stride may be too small if count=1.
2. the src stride may be too small due to the availability bit.

So lets just compute the size needed explicitly and use it.

Fixes: 90a0556c ("radv: use pool stride when copying single query results")
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14242>
This commit is contained in:
Bas Nieuwenhuizen 2021-12-17 10:08:23 +01:00 committed by Marge Bot
parent b775aaff1e
commit afff9dd0f0
1 changed files with 38 additions and 14 deletions

View File

@ -784,8 +784,8 @@ static void
radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline,
struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo,
uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride,
uint32_t dst_stride, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask,
uint32_t avail_offset)
uint32_t dst_stride, size_t dst_size, uint32_t count, uint32_t flags,
uint32_t pipeline_stats_mask, uint32_t avail_offset)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
@ -811,7 +811,7 @@ radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline,
cmd_buffer->state.predicating = false;
uint64_t src_buffer_size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset);
uint64_t dst_buffer_size = count == 1 ? src_stride : dst_stride * count;
uint64_t dst_buffer_size = dst_stride * (count - 1) + dst_size;
radv_buffer_init(&src_buffer, device, src_bo, src_buffer_size, src_offset);
radv_buffer_init(&dst_buffer, device, dst_bo, dst_buffer_size, dst_offset);
@ -1162,6 +1162,29 @@ emit_query_flush(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
}
}
static size_t
radv_query_result_size(const struct radv_query_pool *pool, VkQueryResultFlags flags)
{
unsigned values = (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0;
switch (pool->type) {
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
case VK_QUERY_TYPE_OCCLUSION:
values += 1;
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
values += util_bitcount(pool->pipeline_stats_mask);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
values += 2;
break;
default:
unreachable("trying to get size of unhandled query type");
}
return values * ((flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
@ -1173,6 +1196,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
size_t dst_size = radv_query_result_size(pool, flags);
dest_va += dst_buffer->offset + dstOffset;
if (!queryCount)
@ -1208,8 +1232,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
}
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
0);
dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
flags, 0, 0);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@ -1224,11 +1248,11 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
}
}
radv_query_shader(cmd_buffer,
&cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags,
pool->pipeline_stats_mask, pool->availability_offset + 4 * firstQuery);
radv_query_shader(
cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset,
pool->stride, stride, dst_size, queryCount, flags, pool->pipeline_stats_mask,
pool->availability_offset + 4 * firstQuery);
break;
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
@ -1250,8 +1274,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
0);
dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
flags, 0, 0);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
@ -1271,8 +1295,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
0);
dst_buffer->offset + dstOffset, pool->stride, dst_size, stride, queryCount,
flags, 0, 0);
break;
default:
unreachable("trying to get results of unhandled query type");