turnip: Support pipeline statistics query

Signed-off-by: Hyunjun Ko <zzoon@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6299>
This commit is contained in:
Hyunjun Ko 2020-09-01 05:13:52 +00:00 committed by Marge Bot
parent 170da456ef
commit b92be738d5
2 changed files with 161 additions and 11 deletions

View File

@ -395,7 +395,7 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.textureCompressionASTC_LDR = true, .textureCompressionASTC_LDR = true,
.textureCompressionBC = true, .textureCompressionBC = true,
.occlusionQueryPrecise = true, .occlusionQueryPrecise = true,
.pipelineStatisticsQuery = false, .pipelineStatisticsQuery = true,
.vertexPipelineStoresAndAtomics = true, .vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true, .fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = false, .shaderTessellationAndGeometryPointSize = false,

View File

@ -42,6 +42,7 @@
#define NSEC_PER_SEC 1000000000ull #define NSEC_PER_SEC 1000000000ull
#define WAIT_TIMEOUT 5 #define WAIT_TIMEOUT 5
#define STAT_COUNT ((REG_A6XX_RBBM_PRIMCTR_10_LO - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2 + 1)
struct PACKED query_slot { struct PACKED query_slot {
uint64_t available; uint64_t available;
@ -71,6 +72,14 @@ struct PACKED primitive_slot_value {
uint64_t values[2]; uint64_t values[2];
}; };
struct PACKED pipeline_stat_query_slot {
struct query_slot common;
uint64_t results[STAT_COUNT];
uint64_t begin[STAT_COUNT];
uint64_t end[STAT_COUNT];
};
struct PACKED primitive_query_slot { struct PACKED primitive_query_slot {
struct query_slot common; struct query_slot common;
/* The result of transform feedback queries is two integer values: /* The result of transform feedback queries is two integer values:
@ -95,6 +104,10 @@ struct PACKED primitive_query_slot {
#define occlusion_query_iova(pool, query, field) \ #define occlusion_query_iova(pool, query, field) \
query_iova(struct occlusion_query_slot, pool, query, field) query_iova(struct occlusion_query_slot, pool, query, field)
#define pipeline_stat_query_iova(pool, query, field) \
pool->bo.iova + pool->stride * query + \
offsetof(struct pipeline_stat_query_slot, field)
#define primitive_query_iova(pool, query, field, i) \ #define primitive_query_iova(pool, query, field, i) \
query_iova(struct primitive_query_slot, pool, query, field) + \ query_iova(struct primitive_query_slot, pool, query, field) + \
offsetof(struct primitive_slot_value, values[i]) offsetof(struct primitive_slot_value, values[i])
@ -142,7 +155,8 @@ tu_CreateQueryPool(VkDevice _device,
slot_size = sizeof(struct primitive_query_slot); slot_size = sizeof(struct primitive_query_slot);
break; break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type"); slot_size = sizeof(struct pipeline_stat_query_slot);
break;
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
} }
@ -205,12 +219,47 @@ get_result_count(struct tu_query_pool *pool)
/* Transform feedback queries write two integer values */ /* Transform feedback queries write two integer values */
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
return 2; return 2;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return util_bitcount(pool->pipeline_statistics);
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
return 0; return 0;
} }
} }
static uint32_t
statistics_index(uint32_t *statistics)
{
uint32_t stat;
stat = u_bit_scan(statistics);
switch (1 << stat) {
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT:
case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT:
return 0;
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT:
return 1;
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT:
return 2;
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT:
return 4;
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT:
return 5;
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT:
return 6;
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT:
return 7;
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT:
return 8;
case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT:
return 9;
case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT:
return 10;
default:
return 0;
}
}
/* Wait on the the availability status of a query up until a timeout. */ /* Wait on the the availability status of a query up until a timeout. */
static VkResult static VkResult
wait_for_available(struct tu_device *device, struct tu_query_pool *pool, wait_for_available(struct tu_device *device, struct tu_query_pool *pool,
@ -262,6 +311,7 @@ get_query_pool_results(struct tu_device *device,
struct query_slot *slot = slot_address(pool, query); struct query_slot *slot = slot_address(pool, query);
bool available = query_is_available(slot); bool available = query_is_available(slot);
uint32_t result_count = get_result_count(pool); uint32_t result_count = get_result_count(pool);
uint32_t statistics = pool->pipeline_statistics;
if ((flags & VK_QUERY_RESULT_WAIT_BIT) && !available) { if ((flags & VK_QUERY_RESULT_WAIT_BIT) && !available) {
VkResult wait_result = wait_for_available(device, pool, query); VkResult wait_result = wait_for_available(device, pool, query);
@ -287,7 +337,15 @@ get_query_pool_results(struct tu_device *device,
for (uint32_t k = 0; k < result_count; k++) { for (uint32_t k = 0; k < result_count; k++) {
if (available) { if (available) {
uint64_t *result = query_result_addr(pool, query, k); uint64_t *result;
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics);
result = query_result_addr(pool, query, stat_idx);
} else {
result = query_result_addr(pool, query, k);
}
write_query_value_cpu(result_base, k, *result, flags); write_query_value_cpu(result_base, k, *result, flags);
} else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) } else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
/* From the Vulkan 1.1.130 spec: /* From the Vulkan 1.1.130 spec:
@ -337,10 +395,9 @@ tu_GetQueryPoolResults(VkDevice _device,
case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return get_query_pool_results(device, pool, firstQuery, queryCount, return get_query_pool_results(device, pool, firstQuery, queryCount,
dataSize, pData, stride, flags); dataSize, pData, stride, flags);
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
} }
@ -394,6 +451,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
uint64_t available_iova = query_available_iova(pool, query); uint64_t available_iova = query_available_iova(pool, query);
uint64_t buffer_iova = tu_buffer_iova(buffer) + dstOffset + i * stride; uint64_t buffer_iova = tu_buffer_iova(buffer) + dstOffset + i * stride;
uint32_t result_count = get_result_count(pool); uint32_t result_count = get_result_count(pool);
uint32_t statistics = pool->pipeline_statistics;
/* Wait for the available bit to be set if executed with the /* Wait for the available bit to be set if executed with the
* VK_QUERY_RESULT_WAIT_BIT flag. */ * VK_QUERY_RESULT_WAIT_BIT flag. */
@ -408,7 +466,14 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
} }
for (uint32_t k = 0; k < result_count; k++) { for (uint32_t k = 0; k < result_count; k++) {
uint64_t result_iova = query_result_iova(pool, query, k); uint64_t result_iova;
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics);
result_iova = query_result_iova(pool, query, stat_idx);
} else {
result_iova = query_result_iova(pool, query, k);
}
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) { if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
/* Unconditionally copying the bo->result into the buffer here is /* Unconditionally copying the bo->result into the buffer here is
@ -469,10 +534,9 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return emit_copy_query_pool_results(cmdbuf, cs, pool, firstQuery, return emit_copy_query_pool_results(cmdbuf, cs, pool, firstQuery,
queryCount, buffer, dstOffset, stride, flags); queryCount, buffer, dstOffset, stride, flags);
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
} }
@ -488,17 +552,28 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
for (uint32_t i = 0; i < queryCount; i++) { for (uint32_t i = 0; i < queryCount; i++) {
uint32_t query = firstQuery + i; uint32_t query = firstQuery + i;
uint32_t statistics = pool->pipeline_statistics;
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, query_available_iova(pool, query)); tu_cs_emit_qw(cs, query_available_iova(pool, query));
tu_cs_emit_qw(cs, 0x0); tu_cs_emit_qw(cs, 0x0);
for (uint32_t k = 0; k < get_result_count(pool); k++) { for (uint32_t k = 0; k < get_result_count(pool); k++) {
uint64_t result_iova;
if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics);
result_iova = query_result_iova(pool, query, stat_idx);
} else {
result_iova = query_result_iova(pool, query, k);
}
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, query_result_iova(pool, query, k)); tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, 0x0); tu_cs_emit_qw(cs, 0x0);
} }
} }
} }
void void
@ -514,10 +589,9 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
break; break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("Unimplemented query type");
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
} }
@ -557,6 +631,27 @@ emit_begin_occlusion_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit(cs, ZPASS_DONE); tu_cs_emit(cs, ZPASS_DONE);
} }
static void
emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
uint32_t query)
{
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint64_t begin_iova = pipeline_stat_query_iova(pool, query, begin);
tu6_emit_event_write(cmdbuf, cs, START_PRIMITIVE_CTRS);
tu6_emit_event_write(cmdbuf, cs, RST_PIX_CNT);
tu6_emit_event_write(cmdbuf, cs, TILE_FLUSH);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_0_LO) |
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, begin_iova);
}
static void static void
emit_begin_xfb_query(struct tu_cmd_buffer *cmdbuf, emit_begin_xfb_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool, struct tu_query_pool *pool,
@ -592,6 +687,8 @@ tu_CmdBeginQuery(VkCommandBuffer commandBuffer,
emit_begin_xfb_query(cmdbuf, pool, query, 0); emit_begin_xfb_query(cmdbuf, pool, query, 0);
break; break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_begin_stat_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TIMESTAMP:
unreachable("Unimplemented query type"); unreachable("Unimplemented query type");
default: default:
@ -696,6 +793,57 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(cs, 0x1); tu_cs_emit_qw(cs, 0x1);
} }
static void
emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
uint32_t query)
{
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint64_t end_iova = pipeline_stat_query_iova(pool, query, end);
uint64_t available_iova = query_available_iova(pool, query);
uint64_t result_iova;
uint64_t stat_start_iova;
uint64_t stat_stop_iova;
tu6_emit_event_write(cmdbuf, cs, STOP_PRIMITIVE_CTRS);
tu6_emit_event_write(cmdbuf, cs, RST_VTX_CNT);
tu6_emit_event_write(cmdbuf, cs, STAT_EVENT);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_0_LO) |
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, end_iova);
for (int i = 0; i < STAT_COUNT; i++) {
result_iova = query_result_iova(pool, query, i);
stat_start_iova = pipeline_stat_query_iova(pool, query, begin[i]);
stat_stop_iova = pipeline_stat_query_iova(pool, query, end[i]);
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
tu_cs_emit(cs, CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES |
CP_MEM_TO_MEM_0_DOUBLE |
CP_MEM_TO_MEM_0_NEG_C);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, stat_stop_iova);
tu_cs_emit_qw(cs, stat_start_iova);
}
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if (cmdbuf->state.pass)
cs = &cmdbuf->draw_epilogue_cs;
/* Set the availability to 1 */
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, available_iova);
tu_cs_emit_qw(cs, 0x1);
}
static void static void
emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf, emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool, struct tu_query_pool *pool,
@ -800,6 +948,8 @@ tu_CmdEndQuery(VkCommandBuffer commandBuffer,
emit_end_xfb_query(cmdbuf, pool, query, 0); emit_end_xfb_query(cmdbuf, pool, query, 0);
break; break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_PIPELINE_STATISTICS:
emit_end_stat_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TIMESTAMP:
unreachable("Unimplemented query type"); unreachable("Unimplemented query type");
default: default: