From 685281278ebd39114c3007e76443eaaa66cf8335 Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Thu, 22 Apr 2021 11:07:22 +0200 Subject: [PATCH] v3d: implement performance counter queries These queries are used to retrieve the different hardware counters values, useful to detect performance issues. v2 (Iago): - Fix copyright - Assert hwperfmon exists - Print message on error - Rename hwperfmon structure to make it clear v4 (Juan): - Save last_job_fence when perfmon is stopped v5 (Juan): - No need to ask for counter values if no job was submitted v6 (Juan): - Ensure to flush all jobs before capturing last job fence v7 (Iago) - No braces for single-line body conditionals Reviewed-by: Iago Toral Quiroga Signed-off-by: Juan A. Suarez Romero Part-of: --- src/gallium/drivers/v3d/meson.build | 1 + src/gallium/drivers/v3d/v3d_context.h | 6 + src/gallium/drivers/v3d/v3d_query.c | 32 +- src/gallium/drivers/v3d/v3d_query.h | 6 + src/gallium/drivers/v3d/v3d_query_perfcnt.c | 349 ++++++++++++++++++++ src/gallium/drivers/v3d/v3d_screen.c | 5 + 6 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/v3d/v3d_query_perfcnt.c diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build index 1e92fb62835..b760ca5c025 100644 --- a/src/gallium/drivers/v3d/meson.build +++ b/src/gallium/drivers/v3d/meson.build @@ -33,6 +33,7 @@ files_libv3d = files( 'v3d_query.c', 'v3d_query.h', 'v3d_query_pipe.c', + 'v3d_query_perfcnt.c', 'v3d_resource.c', 'v3d_resource.h', 'v3d_screen.c', diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h index 984a78cfeb1..5bd3e27fd6b 100644 --- a/src/gallium/drivers/v3d/v3d_context.h +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -656,6 +656,12 @@ v3d_stream_output_target_get_vertex_count(struct pipe_stream_output_target *ptar return v3d_stream_output_target(ptarget)->recorded_vertex_count; } +int v3d_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned index, + struct pipe_driver_query_group_info *info); +int v3d_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info); + struct pipe_context *v3d_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); void v3d_program_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c index ae9993b475a..de333bfeee3 100644 --- a/src/gallium/drivers/v3d/v3d_query.c +++ b/src/gallium/drivers/v3d/v3d_query.c @@ -23,10 +23,39 @@ #include "v3d_query.h" +int +v3d_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct v3d_screen *screen = v3d_screen(pscreen); + + return v3d_get_driver_query_group_info_perfcnt(screen, index, info); +} + +int +v3d_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info) +{ + struct v3d_screen *screen = v3d_screen(pscreen); + + return v3d_get_driver_query_info_perfcnt(screen, index, info); +} + static struct pipe_query * v3d_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) { - return v3d_create_query_pipe(v3d_context(pctx), query_type, index); + struct v3d_context *v3d = v3d_context(pctx); + + return v3d_create_query_pipe(v3d, query_type, index); +} + +static struct pipe_query * +v3d_create_batch_query(struct pipe_context *pctx, unsigned num_queries, + unsigned *query_types) +{ + return v3d_create_batch_query_perfcnt(v3d_context(pctx), + num_queries, + query_types); } static void @@ -80,6 +109,7 @@ void v3d_query_init(struct pipe_context *pctx) { pctx->create_query = v3d_create_query; + pctx->create_batch_query = v3d_create_batch_query; pctx->destroy_query = v3d_destroy_query; pctx->begin_query = v3d_begin_query; pctx->end_query = v3d_end_query; diff --git a/src/gallium/drivers/v3d/v3d_query.h b/src/gallium/drivers/v3d/v3d_query.h index 12026b28805..7bc9909c8e2 100644 --- a/src/gallium/drivers/v3d/v3d_query.h +++ b/src/gallium/drivers/v3d/v3d_query.h @@ -42,5 +42,11 @@ struct v3d_query }; struct pipe_query *v3d_create_query_pipe(struct v3d_context *v3d, unsigned query_type, unsigned index); +struct pipe_query *v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries, + unsigned *query_types); +int v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index, + struct pipe_driver_query_group_info *info); +int v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index, + struct pipe_driver_query_info *info); #endif /* V3D_QUERY_H */ diff --git a/src/gallium/drivers/v3d/v3d_query_perfcnt.c b/src/gallium/drivers/v3d/v3d_query_perfcnt.c new file mode 100644 index 00000000000..02aa170e8d9 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_query_perfcnt.c @@ -0,0 +1,349 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * Gallium query object support for performance counters + * + * This contains the performance V3D counters queries. + */ + +#include "v3d_query.h" + +struct v3d_query_perfcnt +{ + struct v3d_query base; + + unsigned num_queries; + struct v3d_perfmon_state *perfmon; +}; + +static const char *v3d_counter_names[] = { + "FEP-valid-primitives-no-rendered-pixels", + "FEP-valid-primitives-rendered-pixels", + "FEP-clipped-quads", + "FEP-valid-quads", + "TLB-quads-not-passing-stencil-test", + "TLB-quads-not-passing-z-and-stencil-test", + "TLB-quads-passing-z-and-stencil-test", + "TLB-quads-with-zero-coverage", + "TLB-quads-with-non-zero-coverage", + "TLB-quads-written-to-color-buffer", + "PTB-primitives-discarded-outside-viewport", + "PTB-primitives-need-clipping", + "PTB-primitives-discared-reversed", + "QPU-total-idle-clk-cycles", + "QPU-total-active-clk-cycles-vertex-coord-shading", + "QPU-total-active-clk-cycles-fragment-shading", + "QPU-total-clk-cycles-executing-valid-instr", + "QPU-total-clk-cycles-waiting-TMU", + "QPU-total-clk-cycles-waiting-scoreboard", + "QPU-total-clk-cycles-waiting-varyings", + "QPU-total-instr-cache-hit", + "QPU-total-instr-cache-miss", + "QPU-total-uniform-cache-hit", + "QPU-total-uniform-cache-miss", + "TMU-total-text-quads-access", + "TMU-total-text-cache-miss", + "VPM-total-clk-cycles-VDW-stalled", + "VPM-total-clk-cycles-VCD-stalled", + "CLE-bin-thread-active-cycles", + "CLE-render-thread-active-cycles", + "L2T-total-cache-hit", + "L2T-total-cache-miss", + "cycle-count", + "QPU-total-clk-cycles-waiting-vertex-coord-shading", + "QPU-total-clk-cycles-waiting-fragment-shading", + "PTB-primitives-binned", + "AXI-writes-seen-watch-0", + "AXI-reads-seen-watch-0", + "AXI-writes-stalled-seen-watch-0", + "AXI-reads-stalled-seen-watch-0", + "AXI-write-bytes-seen-watch-0", + "AXI-read-bytes-seen-watch-0", + "AXI-writes-seen-watch-1", + "AXI-reads-seen-watch-1", + "AXI-writes-stalled-seen-watch-1", + "AXI-reads-stalled-seen-watch-1", + "AXI-write-bytes-seen-watch-1", + "AXI-read-bytes-seen-watch-1", + "TLB-partial-quads-written-to-color-buffer", + "TMU-total-config-access", + "L2T-no-id-stalled", + "L2T-command-queue-stalled", + "L2T-TMU-writes", + "TMU-active-cycles", + "TMU-stalled-cycles", + "CLE-thread-active-cycles", + "L2T-TMU-reads", + "L2T-CLE-reads", + "L2T-VCD-reads", + "L2T-TMU-config-reads", + "L2T-SLC0-reads", + "L2T-SLC1-reads", + "L2T-SLC2-reads", + "L2T-TMU-write-miss", + "L2T-TMU-read-miss", + "L2T-CLE-read-miss", + "L2T-VCD-read-miss", + "L2T-TMU-config-read-miss", + "L2T-SLC0-read-miss", + "L2T-SLC1-read-miss", + "L2T-SLC2-read-miss", + "core-memory-writes", + "L2T-memory-writes", + "PTB-memory-writes", + "TLB-memory-writes", + "core-memory-reads", + "L2T-memory-reads", + "PTB-memory-reads", + "PSE-memory-reads", + "TLB-memory-reads", + "GMP-memory-reads", + "PTB-memory-words-writes", + "TLB-memory-words-writes", + "PSE-memory-words-reads", + "TLB-memory-words-reads", + "TMU-MRU-hits", + "compute-active-cycles", +}; + +static void +kperfmon_destroy(struct v3d_context *v3d, struct v3d_perfmon_state *perfmon) +{ + struct drm_v3d_perfmon_destroy destroyreq; + + destroyreq.id = perfmon->kperfmon_id; + v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_DESTROY, &destroyreq); +} + +int +v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index, + struct pipe_driver_query_group_info *info) +{ + if (!screen->has_perfmon) + return 0; + + if (!info) + return 1; + + if (index > 0) + return 0; + + info->name = "V3D counters"; + info->max_active_queries = DRM_V3D_MAX_PERF_COUNTERS; + info->num_queries = ARRAY_SIZE(v3d_counter_names); + + return 1; +} + +int +v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index, + struct pipe_driver_query_info *info) +{ + if (!screen->has_perfmon) + return 0; + + if (!info) + return ARRAY_SIZE(v3d_counter_names); + + if (index >= ARRAY_SIZE(v3d_counter_names)) + return 0; + + info->group_id = 0; + info->name = v3d_counter_names[index]; + info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; + info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; + + return 1; +} + +static void +v3d_destroy_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query) +{ + struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; + + assert(pquery->perfmon); + + if (v3d->active_perfmon == pquery->perfmon) { + fprintf(stderr, "Query is active; end query before destroying\n"); + return; + } + if (pquery->perfmon->kperfmon_id) + kperfmon_destroy(v3d, pquery->perfmon); + + v3d_fence_unreference(&pquery->perfmon->last_job_fence); + free(pquery->perfmon); + free(query); +} + +static bool +v3d_begin_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query) +{ + struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; + struct drm_v3d_perfmon_create createreq = { 0 }; + int i, ret; + + /* Only one perfmon can be activated per context */ + if (v3d->active_perfmon) { + fprintf(stderr, + "Another query is already active; " + "finish it before starting a new one\n"); + return false; + } + + assert(pquery->perfmon); + + /* Reset the counters by destroying the previously allocated perfmon */ + if (pquery->perfmon->kperfmon_id) + kperfmon_destroy(v3d, pquery->perfmon); + + for (i = 0; i < pquery->num_queries; i++) + createreq.counters[i] = pquery->perfmon->counters[i]; + + createreq.ncounters = pquery->num_queries; + ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_CREATE, &createreq); + if (ret != 0) + return false; + + pquery->perfmon->kperfmon_id = createreq.id; + pquery->perfmon->job_submitted = false; + v3d_fence_unreference(&pquery->perfmon->last_job_fence); + + /* Ensure all pending jobs are flushed before activating the + * perfmon + */ + v3d_flush((struct pipe_context *)v3d); + v3d->active_perfmon = pquery->perfmon; + + return true; +} + +static bool +v3d_end_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query) +{ + struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; + + assert(pquery->perfmon); + + if (v3d->active_perfmon != pquery->perfmon) { + fprintf(stderr, "This query is not active\n"); + return false; + } + + /* Ensure all pending jobs are flushed before deactivating the + * perfmon + */ + v3d_flush((struct pipe_context *)v3d); + + /* Get a copy of latest submitted job's fence to wait for its + * completion + */ + if (v3d->active_perfmon->job_submitted) + v3d->active_perfmon->last_job_fence = v3d_fence_create(v3d); + + v3d->active_perfmon = NULL; + + return true; +} + +static bool +v3d_get_query_result_perfcnt(struct v3d_context *v3d, struct v3d_query *query, + bool wait, union pipe_query_result *vresult) +{ + struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; + struct drm_v3d_perfmon_get_values req = { 0 }; + int i, ret; + + assert(pquery->perfmon); + + if (pquery->perfmon->job_submitted) { + if (!v3d_fence_wait(v3d->screen, + pquery->perfmon->last_job_fence, + wait ? PIPE_TIMEOUT_INFINITE : 0)) + return false; + + req.id = pquery->perfmon->kperfmon_id; + req.values_ptr = (uintptr_t)pquery->perfmon->values; + ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_GET_VALUES, &req); + if (ret != 0) { + fprintf(stderr, "Can't request perfmon counters values\n"); + return false; + } + } + + for (i = 0; i < pquery->num_queries; i++) + vresult->batch[i].u64 = pquery->perfmon->values[i]; + + return true; +} + +static const struct v3d_query_funcs perfcnt_query_funcs = { + .destroy_query = v3d_destroy_query_perfcnt, + .begin_query = v3d_begin_query_perfcnt, + .end_query = v3d_end_query_perfcnt, + .get_query_result = v3d_get_query_result_perfcnt, +}; + +struct pipe_query * +v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries, + unsigned *query_types) +{ + struct v3d_query_perfcnt *pquery = NULL; + struct v3d_query *query; + struct v3d_perfmon_state *perfmon = NULL; + int i; + + /* Validate queries */ + for (i = 0; i < num_queries; i++) { + if (query_types[i] < PIPE_QUERY_DRIVER_SPECIFIC || + query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC + + ARRAY_SIZE(v3d_counter_names)) { + fprintf(stderr, "Invalid query type\n"); + return NULL; + } + } + + pquery = calloc(1, sizeof(*pquery)); + if (!pquery) + return NULL; + + perfmon = calloc(1, sizeof(*perfmon)); + if (!perfmon) { + free(pquery); + return NULL; + } + + for (i = 0; i < num_queries; i++) + perfmon->counters[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC; + + pquery->perfmon = perfmon; + pquery->num_queries = num_queries; + + query = &pquery->base; + query->funcs = &perfcnt_query_funcs; + + /* Note that struct pipe_query isn't actually defined anywhere. */ + return (struct pipe_query *)query; +} diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 4d3ff0c7f56..033a6619f0f 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -827,6 +827,11 @@ v3d_screen_create(int fd, const struct pipe_screen_config *config, pscreen->is_dmabuf_modifier_supported = v3d_screen_is_dmabuf_modifier_supported; + if (screen->has_perfmon) { + pscreen->get_driver_query_group_info = v3d_get_driver_query_group_info; + pscreen->get_driver_query_info = v3d_get_driver_query_info; + } + return pscreen; fail: