turnip: Support pipeline statistics query

Signed-off-by: Hyunjun Ko <zzoon@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6299>
This commit is contained in:
Hyunjun Ko 2020-09-01 05:13:52 +00:00 committed by Marge Bot
parent 170da456ef
commit b92be738d5
2 changed files with 161 additions and 11 deletions

View File

@ -395,7 +395,7 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.textureCompressionASTC_LDR = true,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
.pipelineStatisticsQuery = false,
.pipelineStatisticsQuery = true,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = false,

View File

@ -42,6 +42,7 @@
#define NSEC_PER_SEC 1000000000ull
#define WAIT_TIMEOUT 5
#define STAT_COUNT ((REG_A6XX_RBBM_PRIMCTR_10_LO - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2 + 1)
struct PACKED query_slot {
uint64_t available;
@ -71,6 +72,14 @@ struct PACKED primitive_slot_value {
uint64_t values[2];
};
struct PACKED pipeline_stat_query_slot {
struct query_slot common;
uint64_t results[STAT_COUNT];
uint64_t begin[STAT_COUNT];
uint64_t end[STAT_COUNT];
};
struct PACKED primitive_query_slot {
struct query_slot common;
/* The result of transform feedback queries is two integer values:
@ -95,6 +104,10 @@ struct PACKED primitive_query_slot {
#define occlusion_query_iova(pool, query, field) \
query_iova(struct occlusion_query_slot, pool, query, field)
#define pipeline_stat_query_iova(pool, query, field) \
pool->bo.iova + pool->stride * query + \
offsetof(struct pipeline_stat_query_slot, field)
#define primitive_query_iova(pool, query, field, i) \
query_iova(struct primitive_query_slot, pool, query, field) + \
offsetof(struct primitive_slot_value, values[i])
@ -142,7 +155,8 @@ tu_CreateQueryPool(VkDevice _device,
slot_size = sizeof(struct primitive_query_slot);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
slot_size = sizeof(struct pipeline_stat_query_slot);
break;
default:
assert(!"Invalid query type");
}
@ -205,12 +219,47 @@ get_result_count(struct tu_query_pool *pool)
/* Transform feedback queries write two integer values */
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
return 2;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return util_bitcount(pool->pipeline_statistics);
default:
assert(!"Invalid query type");
return 0;
}
}
static uint32_t
statistics_index(uint32_t *statistics)
{
uint32_t stat;
stat = u_bit_scan(statistics);
switch (1 << stat) {
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT:
case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT:
return 0;
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT:
return 1;
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT:
return 2;
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT:
return 4;
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT:
return 5;
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT:
return 6;
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT:
return 7;
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT:
return 8;
case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT:
return 9;
case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT:
return 10;
default:
return 0;
}
}
/* Wait on the the availability status of a query up until a timeout. */
static VkResult
wait_for_available(struct tu_device *device, struct tu_query_pool *pool,
@ -262,6 +311,7 @@ get_query_pool_results(struct tu_device *device,
struct query_slot *slot = slot_address(pool, query);
bool available = query_is_available(slot);
uint32_t result_count = get_result_count(pool);
uint32_t statistics = pool->pipeline_statistics;
if ((flags & VK_QUERY_RESULT_WAIT_BIT) && !available) {
VkResult wait_result = wait_for_available(device, pool, query);
@ -287,7 +337,15 @@ get_query_pool_results(struct tu_device *device,
for (uint32_t k = 0; k < result_count; k++) {
if (available) {
uint64_t *result = query_result_addr(pool, query, k);
uint64_t *result;
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics);
result = query_result_addr(pool, query, stat_idx);
} else {
result = query_result_addr(pool, query, k);
}
write_query_value_cpu(result_base, k, *result, flags);
} else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
/* From the Vulkan 1.1.130 spec:
@ -337,10 +395,9 @@ tu_GetQueryPoolResults(VkDevice _device,
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return get_query_pool_results(device, pool, firstQuery, queryCount,
dataSize, pData, stride, flags);
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
default:
assert(!"Invalid query type");
}
@ -394,6 +451,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
uint64_t available_iova = query_available_iova(pool, query);
uint64_t buffer_iova = tu_buffer_iova(buffer) + dstOffset + i * stride;
uint32_t result_count = get_result_count(pool);
uint32_t statistics = pool->pipeline_statistics;
/* Wait for the available bit to be set if executed with the
* VK_QUERY_RESULT_WAIT_BIT flag. */
@ -408,7 +466,14 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
}
for (uint32_t k = 0; k < result_count; k++) {
uint64_t result_iova = query_result_iova(pool, query, k);
uint64_t result_iova;
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics);
result_iova = query_result_iova(pool, query, stat_idx);
} else {
result_iova = query_result_iova(pool, query, k);
}
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
/* Unconditionally copying the bo->result into the buffer here is
@ -469,10 +534,9 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return emit_copy_query_pool_results(cmdbuf, cs, pool, firstQuery,
queryCount, buffer, dstOffset, stride, flags);
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
default:
assert(!"Invalid query type");
}
@ -488,17 +552,28 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
for (uint32_t i = 0; i < queryCount; i++) {
uint32_t query = firstQuery + i;
uint32_t statistics = pool->pipeline_statistics;
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, query_available_iova(pool, query));
tu_cs_emit_qw(cs, 0x0);
for (uint32_t k = 0; k < get_result_count(pool); k++) {
uint64_t result_iova;
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics);
result_iova = query_result_iova(pool, query, stat_idx);
} else {
result_iova = query_result_iova(pool, query, k);
}
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, query_result_iova(pool, query, k));
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, 0x0);
}
}
}
void
@ -514,10 +589,9 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
default:
assert(!"Invalid query type");
}
@ -557,6 +631,27 @@ emit_begin_occlusion_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit(cs, ZPASS_DONE);
}
static void
emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
uint32_t query)
{
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint64_t begin_iova = pipeline_stat_query_iova(pool, query, begin);
tu6_emit_event_write(cmdbuf, cs, START_PRIMITIVE_CTRS);
tu6_emit_event_write(cmdbuf, cs, RST_PIX_CNT);
tu6_emit_event_write(cmdbuf, cs, TILE_FLUSH);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_0_LO) |
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, begin_iova);
}
static void
emit_begin_xfb_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
@ -592,6 +687,8 @@ tu_CmdBeginQuery(VkCommandBuffer commandBuffer,
emit_begin_xfb_query(cmdbuf, pool, query, 0);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_begin_stat_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_TIMESTAMP:
unreachable("Unimplemented query type");
default:
@ -696,6 +793,57 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(cs, 0x1);
}
static void
emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
uint32_t query)
{
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint64_t end_iova = pipeline_stat_query_iova(pool, query, end);
uint64_t available_iova = query_available_iova(pool, query);
uint64_t result_iova;
uint64_t stat_start_iova;
uint64_t stat_stop_iova;
tu6_emit_event_write(cmdbuf, cs, STOP_PRIMITIVE_CTRS);
tu6_emit_event_write(cmdbuf, cs, RST_VTX_CNT);
tu6_emit_event_write(cmdbuf, cs, STAT_EVENT);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_0_LO) |
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, end_iova);
for (int i = 0; i < STAT_COUNT; i++) {
result_iova = query_result_iova(pool, query, i);
stat_start_iova = pipeline_stat_query_iova(pool, query, begin[i]);
stat_stop_iova = pipeline_stat_query_iova(pool, query, end[i]);
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
tu_cs_emit(cs, CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES |
CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, stat_stop_iova);
tu_cs_emit_qw(cs, stat_start_iova);
}
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if (cmdbuf->state.pass)
cs = &cmdbuf->draw_epilogue_cs;
/* Set the availability to 1 */
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, available_iova);
tu_cs_emit_qw(cs, 0x1);
}
static void
emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
@ -800,6 +948,8 @@ tu_CmdEndQuery(VkCommandBuffer commandBuffer,
emit_end_xfb_query(cmdbuf, pool, query, 0);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_end_stat_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_TIMESTAMP:
unreachable("Unimplemented query type");
default: