freedreno/computerator: add performance counter support
Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4119>
This commit is contained in:
parent
af68b0d346
commit
30dd059925
|
@ -40,9 +40,17 @@ struct a6xx_backend {
|
|||
|
||||
unsigned seqno;
|
||||
struct fd_bo *control_mem;
|
||||
|
||||
struct fd_bo *query_mem;
|
||||
const struct perfcntr *perfcntrs;
|
||||
unsigned num_perfcntrs;
|
||||
};
|
||||
define_cast(backend, a6xx_backend);
|
||||
|
||||
/*
|
||||
* Data structures shared with GPU:
|
||||
*/
|
||||
|
||||
/* This struct defines the layout of the fd6_context::control buffer: */
|
||||
struct fd6_control {
|
||||
uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */
|
||||
|
@ -65,6 +73,26 @@ struct fd6_control {
|
|||
#define control_ptr(a6xx_backend, member) \
|
||||
(a6xx_backend)->control_mem, offsetof(struct fd6_control, member), 0, 0
|
||||
|
||||
|
||||
struct PACKED fd6_query_sample {
|
||||
uint64_t start;
|
||||
uint64_t result;
|
||||
uint64_t stop;
|
||||
};
|
||||
|
||||
|
||||
/* offset of a single field of an array of fd6_query_sample: */
|
||||
#define query_sample_idx(a6xx_backend, idx, field) \
|
||||
(a6xx_backend)->query_mem, \
|
||||
(idx * sizeof(struct fd6_query_sample)) + \
|
||||
offsetof(struct fd6_query_sample, field), \
|
||||
0, 0
|
||||
|
||||
|
||||
/*
|
||||
* Backend implementation:
|
||||
*/
|
||||
|
||||
static struct kernel *
|
||||
a6xx_assemble(struct backend *b, FILE *in)
|
||||
{
|
||||
|
@ -307,6 +335,8 @@ cache_flush(struct fd_ringbuffer *ring, struct kernel *kernel)
|
|||
static void
|
||||
a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], struct fd_submit *submit)
|
||||
{
|
||||
struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
|
||||
struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend);
|
||||
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0,
|
||||
FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
|
||||
|
||||
|
@ -344,6 +374,34 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], struct fd_submit *submit
|
|||
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
|
||||
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
|
||||
|
||||
if (a6xx_backend->num_perfcntrs > 0) {
|
||||
a6xx_backend->query_mem = fd_bo_new(a6xx_backend->dev,
|
||||
a6xx_backend->num_perfcntrs * sizeof(struct fd6_query_sample),
|
||||
DRM_FREEDRENO_GEM_TYPE_KMEM, "query");
|
||||
|
||||
/* configure the performance counters to count the requested
|
||||
* countables:
|
||||
*/
|
||||
for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
|
||||
const struct perfcntr *counter = &a6xx_backend->perfcntrs[i];
|
||||
|
||||
OUT_PKT4(ring, counter->select_reg, 1);
|
||||
OUT_RING(ring, counter->selector);
|
||||
}
|
||||
|
||||
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
/* and snapshot the start values: */
|
||||
for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
|
||||
const struct perfcntr *counter = &a6xx_backend->perfcntrs[i];
|
||||
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
|
||||
OUT_RELOCW(ring, query_sample_idx(a6xx_backend, i, start));
|
||||
}
|
||||
}
|
||||
|
||||
OUT_PKT7(ring, CP_EXEC_CS, 4);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0]));
|
||||
|
@ -352,9 +410,56 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], struct fd_submit *submit
|
|||
|
||||
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
|
||||
|
||||
if (a6xx_backend->num_perfcntrs > 0) {
|
||||
/* snapshot the end values: */
|
||||
for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
|
||||
const struct perfcntr *counter = &a6xx_backend->perfcntrs[i];
|
||||
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, CP_REG_TO_MEM_0_64B |
|
||||
CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
|
||||
OUT_RELOCW(ring, query_sample_idx(a6xx_backend, i, stop));
|
||||
}
|
||||
|
||||
/* and compute the result: */
|
||||
for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
|
||||
/* result += stop - start: */
|
||||
OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
|
||||
OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
|
||||
CP_MEM_TO_MEM_0_NEG_C);
|
||||
OUT_RELOCW(ring, query_sample_idx(a6xx_backend, i, result)); /* dst */
|
||||
OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* srcA */
|
||||
OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); /* srcB */
|
||||
OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); /* srcC */
|
||||
}
|
||||
}
|
||||
|
||||
cache_flush(ring, kernel);
|
||||
}
|
||||
|
||||
static void
|
||||
a6xx_set_perfcntrs(struct backend *b, const struct perfcntr *perfcntrs,
|
||||
unsigned num_perfcntrs)
|
||||
{
|
||||
struct a6xx_backend *a6xx_backend = to_a6xx_backend(b);
|
||||
|
||||
a6xx_backend->perfcntrs = perfcntrs;
|
||||
a6xx_backend->num_perfcntrs = num_perfcntrs;
|
||||
}
|
||||
|
||||
static void
|
||||
a6xx_read_perfcntrs(struct backend *b, uint64_t *results)
|
||||
{
|
||||
struct a6xx_backend *a6xx_backend = to_a6xx_backend(b);
|
||||
|
||||
fd_bo_cpu_prep(a6xx_backend->query_mem, NULL, DRM_FREEDRENO_PREP_READ);
|
||||
struct fd6_query_sample *samples = fd_bo_map(a6xx_backend->query_mem);
|
||||
|
||||
for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
|
||||
results[i] = samples[i].result;
|
||||
}
|
||||
}
|
||||
|
||||
struct backend *
|
||||
a6xx_init(struct fd_device *dev, uint32_t gpu_id)
|
||||
{
|
||||
|
@ -364,6 +469,8 @@ a6xx_init(struct fd_device *dev, uint32_t gpu_id)
|
|||
.assemble = a6xx_assemble,
|
||||
.disassemble = a6xx_disassemble,
|
||||
.emit_grid = a6xx_emit_grid,
|
||||
.set_perfcntrs = a6xx_set_perfcntrs,
|
||||
.read_perfcntrs = a6xx_read_perfcntrs,
|
||||
};
|
||||
|
||||
a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id);
|
||||
|
|
|
@ -22,10 +22,14 @@
|
|||
*/
|
||||
|
||||
#include <getopt.h>
|
||||
#include <inttypes.h>
|
||||
#include <locale.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "perfcntrs/freedreno_perfcntr.h"
|
||||
|
||||
#include "main.h"
|
||||
|
||||
|
||||
|
@ -91,13 +95,14 @@ dump_hex(void *buf, int sz)
|
|||
}
|
||||
}
|
||||
|
||||
static const char *shortopts = "df:g:h";
|
||||
static const char *shortopts = "df:g:hp:";
|
||||
|
||||
static const struct option longopts[] = {
|
||||
{"disasm", no_argument, 0, 'd'},
|
||||
{"file", required_argument, 0, 'f'},
|
||||
{"groups", required_argument, 0, 'g'},
|
||||
{"help", no_argument, 0, 'h'},
|
||||
{"perfcntr", required_argument, 0, 'p'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
@ -111,18 +116,101 @@ usage(const char *name)
|
|||
" -f, --file=FILE read shader from file (instead of stdin)\n"
|
||||
" -g, --groups=X,Y,Z use specified group size\n"
|
||||
" -h, --help show this message\n"
|
||||
" -p, --perfcntr=LIST sample specified performance counters (comma\n"
|
||||
" separated list)\n"
|
||||
,
|
||||
name);
|
||||
}
|
||||
|
||||
/* performance counter description: */
|
||||
static unsigned num_groups;
|
||||
static const struct fd_perfcntr_group *groups;
|
||||
|
||||
/* Track enabled counters per group: */
|
||||
static unsigned *enabled_counters;
|
||||
|
||||
static void
|
||||
setup_counter(const char *name, struct perfcntr *c)
|
||||
{
|
||||
for (int i = 0; i < num_groups; i++) {
|
||||
const struct fd_perfcntr_group *group = &groups[i];
|
||||
|
||||
for (int j = 0; j < group->num_countables; j++) {
|
||||
const struct fd_perfcntr_countable *countable = &group->countables[j];
|
||||
|
||||
if (strcmp(name, countable->name) != 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Allocate a counter to use to monitor the requested countable:
|
||||
*/
|
||||
if (enabled_counters[i] >= group->num_counters) {
|
||||
errx(-1, "Too many counters selected in group: %s", group->name);
|
||||
}
|
||||
|
||||
unsigned idx = enabled_counters[i]++;
|
||||
const struct fd_perfcntr_counter *counter = &group->counters[idx];
|
||||
|
||||
/*
|
||||
* And initialize the perfcntr struct, pulling together the info
|
||||
* about selected counter and countable, to simplify life for the
|
||||
* backend:
|
||||
*/
|
||||
c->name = name;
|
||||
c->select_reg = counter->select_reg;
|
||||
c->counter_reg_lo = counter->counter_reg_lo;
|
||||
c->counter_reg_hi = counter->counter_reg_hi;
|
||||
c->selector = countable->selector;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
errx(-1, "could not find countable: %s", name);
|
||||
}
|
||||
|
||||
static struct perfcntr *
|
||||
parse_perfcntrs(uint32_t gpu_id, const char *perfcntrstr, unsigned *num_perfcntrs)
|
||||
{
|
||||
struct perfcntr *counters = NULL;
|
||||
char *cnames, *s;
|
||||
unsigned cnt = 0;
|
||||
|
||||
groups = fd_perfcntrs(gpu_id, &num_groups);
|
||||
enabled_counters = calloc(num_groups, sizeof(enabled_counters[0]));
|
||||
|
||||
cnames = strdup(perfcntrstr);
|
||||
while ((s = strstr(cnames, ","))) {
|
||||
char *name = cnames;
|
||||
s[0] = '\0';
|
||||
cnames = &s[1];
|
||||
|
||||
counters = realloc(counters, ++cnt * sizeof(counters[0]));
|
||||
setup_counter(name, &counters[cnt-1]);
|
||||
}
|
||||
|
||||
char * name = cnames;
|
||||
counters = realloc(counters, ++cnt * sizeof(counters[0]));
|
||||
setup_counter(name, &counters[cnt-1]);
|
||||
|
||||
*num_perfcntrs = cnt;
|
||||
|
||||
return counters;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
FILE *in = stdin;
|
||||
const char *perfcntrstr = NULL;
|
||||
struct perfcntr *perfcntrs = NULL;
|
||||
unsigned num_perfcntrs = 0;
|
||||
bool disasm = false;
|
||||
uint32_t grid[3] = {0};
|
||||
int opt, ret;
|
||||
|
||||
setlocale(LC_NUMERIC, "en_US.UTF-8");
|
||||
|
||||
while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
|
@ -140,6 +228,9 @@ main(int argc, char **argv)
|
|||
break;
|
||||
case 'h':
|
||||
goto usage;
|
||||
case 'p':
|
||||
perfcntrstr = optarg;
|
||||
break;
|
||||
default:
|
||||
printf("unrecognized arg: %c\n", opt);
|
||||
goto usage;
|
||||
|
@ -185,6 +276,14 @@ main(int argc, char **argv)
|
|||
|
||||
struct fd_submit *submit = fd_submit_new(pipe);
|
||||
|
||||
if (perfcntrstr) {
|
||||
if (!backend->set_perfcntrs) {
|
||||
err(1, "performance counters not supported");
|
||||
}
|
||||
perfcntrs = parse_perfcntrs(gpu_id, perfcntrstr, &num_perfcntrs);
|
||||
backend->set_perfcntrs(backend, perfcntrs, num_perfcntrs);
|
||||
}
|
||||
|
||||
backend->emit_grid(kernel, grid, submit);
|
||||
|
||||
fd_submit_flush(submit, -1, NULL, NULL);
|
||||
|
@ -198,6 +297,15 @@ main(int argc, char **argv)
|
|||
dump_float(map, kernel->buf_sizes[i] * 4);
|
||||
}
|
||||
|
||||
if (perfcntrstr) {
|
||||
uint64_t results[num_perfcntrs];
|
||||
backend->read_perfcntrs(backend, results);
|
||||
|
||||
for (unsigned i = 0; i < num_perfcntrs; i++) {
|
||||
printf("%s:\t%'"PRIu64"\n", perfcntrs[i].name, results[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
usage:
|
||||
|
|
|
@ -46,12 +46,31 @@ struct kernel {
|
|||
struct fd_bo *bufs[MAX_BUFS];
|
||||
};
|
||||
|
||||
struct perfcntr {
|
||||
const char *name;
|
||||
|
||||
/* for backend to configure/read the counter, describes
|
||||
* the selected counter:
|
||||
*/
|
||||
unsigned select_reg;
|
||||
unsigned counter_reg_lo;
|
||||
unsigned counter_reg_hi;
|
||||
/* and selected countable:
|
||||
*/
|
||||
unsigned selector;
|
||||
};
|
||||
|
||||
/* per-generation entry-points: */
|
||||
struct backend {
|
||||
struct kernel *(*assemble)(struct backend *b, FILE *in);
|
||||
void (*disassemble)(struct kernel *kernel, FILE *out);
|
||||
void (*emit_grid)(struct kernel *kernel, uint32_t grid[3],
|
||||
struct fd_submit *submit);
|
||||
|
||||
/* performance-counter API: */
|
||||
void (*set_perfcntrs)(struct backend *b, const struct perfcntr *perfcntrs,
|
||||
unsigned num_perfcntrs);
|
||||
void (*read_perfcntrs)(struct backend *b, uint64_t *results);
|
||||
};
|
||||
|
||||
#define define_cast(_from, _to) \
|
||||
|
|
|
@ -56,6 +56,7 @@ computerator = executable(
|
|||
link_with : [
|
||||
libfreedreno_drm,
|
||||
libfreedreno_ir3,
|
||||
libfreedreno_perfcntrs,
|
||||
],
|
||||
dependencies : [
|
||||
dep_libdrm,
|
||||
|
|
Loading…
Reference in New Issue