turnip: Implement VK_EXT_primitives_generated_query

Similar to pipeline statistics but done for a single counter.

We use REG_A6XX_RBBM_PRIMCTR_7 to get generated primitives
and not PRIMCTR_8 because PRIMCTR_7 counts pre-clipped prims
while PRIMCTR_8 counts them after clipping.

OpenGL spec for GL_PRIMITIVES_GENERATED says:
 "Subsequent rendering will increment the counter once for every
  vertex that is emitted from the geometry shader, or from the
  vertex shader if no geometry shader is present."

Passes tests:
 dEQP-VK.transform_feedback.primitives_generated_query.*

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15746>
This commit is contained in:
Danylo Piliaiev 2022-04-05 12:41:10 +03:00 committed by Marge Bot
parent 5ac8f10ec3
commit dde1623ed2
3 changed files with 103 additions and 1 deletions

View File

@ -560,7 +560,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_physical_device_drm DONE (anv, radv, tu, v3dv)
VK_EXT_post_depth_coverage DONE (anv/gfx10+, lvp, radv/gfx10+)
VK_EXT_primitive_topology_list_restart DONE (anv, lvp, radv, tu)
VK_EXT_primitives_generated_query DONE (lvp)
VK_EXT_primitives_generated_query DONE (lvp, tu)
VK_EXT_provoking_vertex DONE (anv, lvp, radv, tu, v3dv)
VK_EXT_queue_family_foreign DONE (anv, radv, vn)
VK_EXT_robustness2 DONE (anv, radv, tu)

View File

@ -203,6 +203,7 @@ get_device_extensions(const struct tu_physical_device *device,
.EXT_line_rasterization = true,
.EXT_subgroup_size_control = true,
.EXT_image_robustness = true,
.EXT_primitives_generated_query = true,
#ifndef TU_USE_KGSL
.EXT_physical_device_drm = true,
#endif
@ -838,6 +839,14 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->texelBufferAlignment = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
(VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
features->primitivesGeneratedQuery = true;
features->primitivesGeneratedQueryWithRasterizerDiscard = false;
features->primitivesGeneratedQueryWithNonZeroStreams = false;
break;
}
default:
break;

View File

@ -108,6 +108,13 @@ struct PACKED perf_query_slot {
struct perfcntr_query_slot perfcntr;
};
struct PACKED primitives_generated_query_slot {
struct query_slot common;
uint64_t result;
uint64_t begin;
uint64_t end;
};
/* Returns the IOVA of a given uint64_t field in a given slot of a query
* pool. */
#define query_iova(type, pool, query, field) \
@ -130,6 +137,9 @@ struct PACKED perf_query_slot {
sizeof(struct perfcntr_query_slot) * (i) + \
offsetof(struct perfcntr_query_slot, field)
#define primitives_generated_query_iova(pool, query, field) \
query_iova(struct primitives_generated_query_slot, pool, query, field)
#define query_available_iova(pool, query) \
query_iova(struct query_slot, pool, query, available)
@ -239,6 +249,9 @@ tu_CreateQueryPool(VkDevice _device,
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
slot_size = sizeof(struct primitive_query_slot);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
slot_size = sizeof(struct primitives_generated_query_slot);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
perf_query_info =
vk_find_struct_const(pCreateInfo->pNext,
@ -364,6 +377,7 @@ get_result_count(struct tu_query_pool *pool)
/* Occulusion and timestamp queries write one integer value */
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
return 1;
/* Transform feedback queries write two integer values */
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
@ -548,6 +562,7 @@ tu_GetQueryPoolResults(VkDevice _device,
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
return get_query_pool_results(device, pool, firstQuery, queryCount,
@ -689,6 +704,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return emit_copy_query_pool_results(cmdbuf, cs, pool, firstQuery,
queryCount, buffer, dstOffset, stride, flags);
@ -749,6 +765,7 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
@ -936,6 +953,27 @@ emit_begin_xfb_query(struct tu_cmd_buffer *cmdbuf,
tu6_emit_event_write(cmdbuf, cs, WRITE_PRIMITIVE_COUNTS);
}
static void
emit_begin_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
uint32_t query)
{
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint64_t begin_iova = primitives_generated_query_iova(pool, query, begin);
tu6_emit_event_write(cmdbuf, cs, START_PRIMITIVE_CTRS);
tu6_emit_event_write(cmdbuf, cs, RST_PIX_CNT);
tu6_emit_event_write(cmdbuf, cs, TILE_FLUSH);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_7_LO) |
CP_REG_TO_MEM_0_CNT(2) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, begin_iova);
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdBeginQuery(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
@ -957,6 +995,9 @@ tu_CmdBeginQuery(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
emit_begin_xfb_query(cmdbuf, pool, query, 0);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
emit_begin_prim_generated_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
emit_begin_perf_query(cmdbuf, pool, query);
break;
@ -985,6 +1026,9 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
emit_begin_xfb_query(cmdbuf, pool, query, index);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
emit_begin_prim_generated_query(cmdbuf, pool, query);
break;
default:
assert(!"Invalid query type");
}
@ -1242,6 +1286,49 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(cs, 0x1);
}
static void
emit_end_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool,
uint32_t query)
{
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint64_t begin_iova = primitives_generated_query_iova(pool, query, begin);
uint64_t end_iova = primitives_generated_query_iova(pool, query, end);
uint64_t result_iova = primitives_generated_query_iova(pool, query, result);
uint64_t available_iova = query_available_iova(pool, query);
tu6_emit_event_write(cmdbuf, cs, STOP_PRIMITIVE_CTRS);
tu6_emit_event_write(cmdbuf, cs, RST_VTX_CNT);
tu6_emit_event_write(cmdbuf, cs, STAT_EVENT);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_7_LO) |
CP_REG_TO_MEM_0_CNT(2) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, end_iova);
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
tu_cs_emit(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C |
CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, result_iova);
tu_cs_emit_qw(cs, end_iova);
tu_cs_emit_qw(cs, begin_iova);
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if (cmdbuf->state.pass)
cs = &cmdbuf->draw_epilogue_cs;
/* Set the availability to 1 */
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, available_iova);
tu_cs_emit_qw(cs, 0x1);
}
/* Implement this bit of spec text from section 17.2 "Query Operation":
*
* If queries are used while executing a render pass instance that has
@ -1296,6 +1383,9 @@ tu_CmdEndQuery(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
emit_end_xfb_query(cmdbuf, pool, query, 0);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
emit_end_prim_generated_query(cmdbuf, pool, query);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
emit_end_perf_query(cmdbuf, pool, query);
break;
@ -1326,6 +1416,9 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
assert(index <= 4);
emit_end_xfb_query(cmdbuf, pool, query, index);
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
emit_end_prim_generated_query(cmdbuf, pool, query);
break;
default:
assert(!"Invalid query type");
}