nv50: add compute invocations counter

This is a purely software counter alongside the other hardware counters
for ease of use and consistency. However we have to make room for it in
the allocated query space. Use this opportunity to make the nv50 queries
work like the nvc0 ones in terms of space allocation.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Pierre Moreau <dev@pmoreau.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10164>
This commit is contained in:
Ilia Mirkin 2021-03-02 00:18:07 -05:00 committed by Marge Bot
parent bd2f14a5ea
commit 58d47ca324
3 changed files with 29 additions and 10 deletions

View File

@ -628,4 +628,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
/* bind a compute shader clobbers fragment shader state */
nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] *
info->grid[0] * info->grid[1] * info->grid[2];
}

View File

@ -225,6 +225,8 @@ struct nv50_context {
uint16_t images_valid;
struct util_dynarray global_residents;
uint64_t compute_invocations;
};
static inline struct nv50_context *

View File

@ -174,14 +174,15 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
nv50_hw_query_get(push, q, 0x30, 0x06805002);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
nv50_hw_query_get(push, q, 0x90, 0x00801002); /* VFETCH, VERTICES */
nv50_hw_query_get(push, q, 0xa0, 0x01801002); /* VFETCH, PRIMS */
nv50_hw_query_get(push, q, 0xb0, 0x02802002); /* VP, LAUNCHES */
nv50_hw_query_get(push, q, 0xc0, 0x03806002); /* GP, LAUNCHES */
nv50_hw_query_get(push, q, 0xd0, 0x04806002); /* GP, PRIMS_OUT */
nv50_hw_query_get(push, q, 0xe0, 0x07804002); /* RAST, PRIMS_IN */
nv50_hw_query_get(push, q, 0xf0, 0x08804002); /* RAST, PRIMS_OUT */
nv50_hw_query_get(push, q, 0x100, 0x0980a002); /* ROP, PIXELS */
((uint64_t *)hq->data)[2 * 0x11] = nv50->compute_invocations;
break;
case PIPE_QUERY_TIME_ELAPSED:
nv50_hw_query_get(push, q, 0x10, 0x00005002);
@ -237,6 +238,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
((uint64_t *)hq->data)[2 * 0x8] = nv50->compute_invocations;
break;
case PIPE_QUERY_TIMESTAMP:
hq->sequence++;
@ -316,7 +318,8 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
for (i = 0; i < 8; ++i)
res64[i] = data64[i * 2] - data64[16 + i * 2];
res64[i] = data64[i * 2] - data64[18 + i * 2];
result->pipeline_statistics.cs_invocations = data64[i * 2] - data64[18 + i * 2];
break;
case PIPE_QUERY_TIMESTAMP:
res64[0] = data64[1];
@ -351,6 +354,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
{
struct nv50_hw_query *hq;
struct nv50_query *q;
unsigned space = NV50_HW_QUERY_ALLOC_SPACE;
hq = nv50_hw_sm_create_query(nv50, type);
if (hq) {
@ -380,15 +384,25 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
hq->is64bit = true;
space = 32;
break;
case PIPE_QUERY_SO_STATISTICS:
hq->is64bit = true;
space = 64;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
hq->is64bit = true;
space = 9 * 2 * 16; /* 9 values, start/end, 16-bytes each */
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
space = 32;
break;
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
space = 16;
break;
default:
debug_printf("invalid query type: %u\n", type);
@ -396,7 +410,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
return NULL;
}
if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
if (!nv50_hw_query_allocate(nv50, q, space)) {
FREE(hq);
return NULL;
}