turnip: Implement VK_EXT_primitives_generated_query
Similar to pipeline statistics but done for a single counter. We use REG_A6XX_RBBM_PRIMCTR_7 to get generated primitives and not PRIMCTR_8 because PRIMCTR_7 counts pre-clipped prims while PRIMCTR_8 counts them after clipping. OpenGL spec for GL_PRIMITIVES_GENERATED says: "Subsequent rendering will increment the counter once for every vertex that is emitted from the geometry shader, or from the vertex shader if no geometry shader is present." Passes tests: dEQP-VK.transform_feedback.primitives_generated_query.* Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15746>
This commit is contained in:
parent
5ac8f10ec3
commit
dde1623ed2
|
@ -560,7 +560,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_EXT_physical_device_drm DONE (anv, radv, tu, v3dv)
|
||||
VK_EXT_post_depth_coverage DONE (anv/gfx10+, lvp, radv/gfx10+)
|
||||
VK_EXT_primitive_topology_list_restart DONE (anv, lvp, radv, tu)
|
||||
VK_EXT_primitives_generated_query DONE (lvp)
|
||||
VK_EXT_primitives_generated_query DONE (lvp, tu)
|
||||
VK_EXT_provoking_vertex DONE (anv, lvp, radv, tu, v3dv)
|
||||
VK_EXT_queue_family_foreign DONE (anv, radv, vn)
|
||||
VK_EXT_robustness2 DONE (anv, radv, tu)
|
||||
|
|
|
@ -203,6 +203,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.EXT_line_rasterization = true,
|
||||
.EXT_subgroup_size_control = true,
|
||||
.EXT_image_robustness = true,
|
||||
.EXT_primitives_generated_query = true,
|
||||
#ifndef TU_USE_KGSL
|
||||
.EXT_physical_device_drm = true,
|
||||
#endif
|
||||
|
@ -838,6 +839,14 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
features->texelBufferAlignment = true;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
|
||||
VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
|
||||
(VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
|
||||
features->primitivesGeneratedQuery = true;
|
||||
features->primitivesGeneratedQueryWithRasterizerDiscard = false;
|
||||
features->primitivesGeneratedQueryWithNonZeroStreams = false;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -108,6 +108,13 @@ struct PACKED perf_query_slot {
|
|||
struct perfcntr_query_slot perfcntr;
|
||||
};
|
||||
|
||||
struct PACKED primitives_generated_query_slot {
|
||||
struct query_slot common;
|
||||
uint64_t result;
|
||||
uint64_t begin;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
/* Returns the IOVA of a given uint64_t field in a given slot of a query
|
||||
* pool. */
|
||||
#define query_iova(type, pool, query, field) \
|
||||
|
@ -130,6 +137,9 @@ struct PACKED perf_query_slot {
|
|||
sizeof(struct perfcntr_query_slot) * (i) + \
|
||||
offsetof(struct perfcntr_query_slot, field)
|
||||
|
||||
#define primitives_generated_query_iova(pool, query, field) \
|
||||
query_iova(struct primitives_generated_query_slot, pool, query, field)
|
||||
|
||||
#define query_available_iova(pool, query) \
|
||||
query_iova(struct query_slot, pool, query, available)
|
||||
|
||||
|
@ -239,6 +249,9 @@ tu_CreateQueryPool(VkDevice _device,
|
|||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
slot_size = sizeof(struct primitive_query_slot);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
slot_size = sizeof(struct primitives_generated_query_slot);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
perf_query_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext,
|
||||
|
@ -364,6 +377,7 @@ get_result_count(struct tu_query_pool *pool)
|
|||
/* Occulusion and timestamp queries write one integer value */
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
return 1;
|
||||
/* Transform feedback queries write two integer values */
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
|
@ -548,6 +562,7 @@ tu_GetQueryPoolResults(VkDevice _device,
|
|||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||
return get_query_pool_results(device, pool, firstQuery, queryCount,
|
||||
|
@ -689,6 +704,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
return emit_copy_query_pool_results(cmdbuf, cs, pool, firstQuery,
|
||||
queryCount, buffer, dstOffset, stride, flags);
|
||||
|
@ -749,6 +765,7 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
|||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
|
||||
|
@ -936,6 +953,27 @@ emit_begin_xfb_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu6_emit_event_write(cmdbuf, cs, WRITE_PRIMITIVE_COUNTS);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_begin_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_query_pool *pool,
|
||||
uint32_t query)
|
||||
{
|
||||
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
|
||||
uint64_t begin_iova = primitives_generated_query_iova(pool, query, begin);
|
||||
|
||||
tu6_emit_event_write(cmdbuf, cs, START_PRIMITIVE_CTRS);
|
||||
tu6_emit_event_write(cmdbuf, cs, RST_PIX_CNT);
|
||||
tu6_emit_event_write(cmdbuf, cs, TILE_FLUSH);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_7_LO) |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdBeginQuery(VkCommandBuffer commandBuffer,
|
||||
VkQueryPool queryPool,
|
||||
|
@ -957,6 +995,9 @@ tu_CmdBeginQuery(VkCommandBuffer commandBuffer,
|
|||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
emit_begin_xfb_query(cmdbuf, pool, query, 0);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
emit_begin_prim_generated_query(cmdbuf, pool, query);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||
emit_begin_perf_query(cmdbuf, pool, query);
|
||||
break;
|
||||
|
@ -985,6 +1026,9 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer,
|
|||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
emit_begin_xfb_query(cmdbuf, pool, query, index);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
emit_begin_prim_generated_query(cmdbuf, pool, query);
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid query type");
|
||||
}
|
||||
|
@ -1242,6 +1286,49 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit_qw(cs, 0x1);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_end_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_query_pool *pool,
|
||||
uint32_t query)
|
||||
{
|
||||
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
|
||||
|
||||
uint64_t begin_iova = primitives_generated_query_iova(pool, query, begin);
|
||||
uint64_t end_iova = primitives_generated_query_iova(pool, query, end);
|
||||
uint64_t result_iova = primitives_generated_query_iova(pool, query, result);
|
||||
uint64_t available_iova = query_available_iova(pool, query);
|
||||
|
||||
tu6_emit_event_write(cmdbuf, cs, STOP_PRIMITIVE_CTRS);
|
||||
tu6_emit_event_write(cmdbuf, cs, RST_VTX_CNT);
|
||||
tu6_emit_event_write(cmdbuf, cs, STAT_EVENT);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_7_LO) |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
|
||||
tu_cs_emit(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C |
|
||||
CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
|
||||
if (cmdbuf->state.pass)
|
||||
cs = &cmdbuf->draw_epilogue_cs;
|
||||
|
||||
/* Set the availability to 1 */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
|
||||
tu_cs_emit_qw(cs, available_iova);
|
||||
tu_cs_emit_qw(cs, 0x1);
|
||||
}
|
||||
|
||||
/* Implement this bit of spec text from section 17.2 "Query Operation":
|
||||
*
|
||||
* If queries are used while executing a render pass instance that has
|
||||
|
@ -1296,6 +1383,9 @@ tu_CmdEndQuery(VkCommandBuffer commandBuffer,
|
|||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
emit_end_xfb_query(cmdbuf, pool, query, 0);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
emit_end_prim_generated_query(cmdbuf, pool, query);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||
emit_end_perf_query(cmdbuf, pool, query);
|
||||
break;
|
||||
|
@ -1326,6 +1416,9 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
|
|||
assert(index <= 4);
|
||||
emit_end_xfb_query(cmdbuf, pool, query, index);
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
emit_end_prim_generated_query(cmdbuf, pool, query);
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid query type");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue