nv50: add compute invocations counter
This is a purely software counter alongside the other hardware counters for ease of use and consistency. However we have to make room for it in the allocated query space. Use this opportunity to make the nv50 queries work like the nvc0 ones in terms of space allocation. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Pierre Moreau <dev@pmoreau.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10164>
This commit is contained in:
parent
bd2f14a5ea
commit
58d47ca324
|
@ -628,4 +628,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
|
||||
/* bind a compute shader clobbers fragment shader state */
|
||||
nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
|
||||
|
||||
nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] *
|
||||
info->grid[0] * info->grid[1] * info->grid[2];
|
||||
}
|
||||
|
|
|
@ -225,6 +225,8 @@ struct nv50_context {
|
|||
uint16_t images_valid;
|
||||
|
||||
struct util_dynarray global_residents;
|
||||
|
||||
uint64_t compute_invocations;
|
||||
};
|
||||
|
||||
static inline struct nv50_context *
|
||||
|
|
|
@ -174,14 +174,15 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
|
|||
nv50_hw_query_get(push, q, 0x30, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
|
||||
nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
|
||||
nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
|
||||
nv50_hw_query_get(push, q, 0x90, 0x00801002); /* VFETCH, VERTICES */
|
||||
nv50_hw_query_get(push, q, 0xa0, 0x01801002); /* VFETCH, PRIMS */
|
||||
nv50_hw_query_get(push, q, 0xb0, 0x02802002); /* VP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0xc0, 0x03806002); /* GP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0xd0, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0xe0, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_hw_query_get(push, q, 0xf0, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0x100, 0x0980a002); /* ROP, PIXELS */
|
||||
((uint64_t *)hq->data)[2 * 0x11] = nv50->compute_invocations;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nv50_hw_query_get(push, q, 0x10, 0x00005002);
|
||||
|
@ -237,6 +238,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
|
|||
nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
|
||||
((uint64_t *)hq->data)[2 * 0x8] = nv50->compute_invocations;
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
hq->sequence++;
|
||||
|
@ -316,7 +318,8 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
|
|||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
for (i = 0; i < 8; ++i)
|
||||
res64[i] = data64[i * 2] - data64[16 + i * 2];
|
||||
res64[i] = data64[i * 2] - data64[18 + i * 2];
|
||||
result->pipeline_statistics.cs_invocations = data64[i * 2] - data64[18 + i * 2];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
res64[0] = data64[1];
|
||||
|
@ -351,6 +354,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
|
|||
{
|
||||
struct nv50_hw_query *hq;
|
||||
struct nv50_query *q;
|
||||
unsigned space = NV50_HW_QUERY_ALLOC_SPACE;
|
||||
|
||||
hq = nv50_hw_sm_create_query(nv50, type);
|
||||
if (hq) {
|
||||
|
@ -380,15 +384,25 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
|
|||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
hq->is64bit = true;
|
||||
space = 32;
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
hq->is64bit = true;
|
||||
space = 64;
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
hq->is64bit = true;
|
||||
space = 9 * 2 * 16; /* 9 values, start/end, 16-bytes each */
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
space = 32;
|
||||
break;
|
||||
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
space = 16;
|
||||
break;
|
||||
default:
|
||||
debug_printf("invalid query type: %u\n", type);
|
||||
|
@ -396,7 +410,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
|
||||
if (!nv50_hw_query_allocate(nv50, q, space)) {
|
||||
FREE(hq);
|
||||
return NULL;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue