intel: Rename gen_perf prefix to intel_perf in source files
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965 grep -E "gen_perf" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_perf\([^\.]\)/intel_perf\1/g" Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>
This commit is contained in:
parent
e6e79436af
commit
bbe81292c6
|
@ -662,7 +662,7 @@ struct iris_context {
|
|||
struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
|
||||
} shaders;
|
||||
|
||||
struct gen_perf_context *perf_ctx;
|
||||
struct intel_perf_context *perf_ctx;
|
||||
|
||||
/** Frame number for debug prints */
|
||||
uint32_t frame;
|
||||
|
|
|
@ -35,7 +35,7 @@ struct iris_monitor_object {
|
|||
size_t result_size;
|
||||
unsigned char *result_buffer;
|
||||
|
||||
struct gen_perf_query_object *query;
|
||||
struct intel_perf_query_object *query;
|
||||
};
|
||||
|
||||
int
|
||||
|
@ -43,7 +43,7 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
|
|||
struct pipe_driver_query_info *info)
|
||||
{
|
||||
const struct iris_screen *screen = (struct iris_screen *)pscreen;
|
||||
const struct gen_perf_config *perf_cfg = screen->perf_cfg;
|
||||
const struct intel_perf_config *perf_cfg = screen->perf_cfg;
|
||||
assert(perf_cfg);
|
||||
if (!perf_cfg)
|
||||
return 0;
|
||||
|
@ -53,8 +53,8 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
|
|||
return perf_cfg->n_counters;
|
||||
}
|
||||
|
||||
struct gen_perf_query_counter_info *counter_info = &perf_cfg->counter_infos[index];
|
||||
struct gen_perf_query_counter *counter = counter_info->counter;
|
||||
struct intel_perf_query_counter_info *counter_info = &perf_cfg->counter_infos[index];
|
||||
struct intel_perf_query_counter *counter = counter_info->counter;
|
||||
|
||||
info->group_id = counter_info->location.group_idx;
|
||||
info->name = counter->name;
|
||||
|
@ -93,7 +93,7 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
|
|||
static bool
|
||||
iris_monitor_init_metrics(struct iris_screen *screen)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = gen_perf_new(screen);
|
||||
struct intel_perf_config *perf_cfg = intel_perf_new(screen);
|
||||
if (unlikely(!perf_cfg))
|
||||
return false;
|
||||
|
||||
|
@ -101,7 +101,7 @@ iris_monitor_init_metrics(struct iris_screen *screen)
|
|||
|
||||
iris_perf_init_vtbl(perf_cfg);
|
||||
|
||||
gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd,
|
||||
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd,
|
||||
true /* pipeline stats*/);
|
||||
|
||||
return perf_cfg->n_counters > 0;
|
||||
|
@ -118,7 +118,7 @@ iris_get_monitor_group_info(struct pipe_screen *pscreen,
|
|||
return 0;
|
||||
}
|
||||
|
||||
const struct gen_perf_config *perf_cfg = screen->perf_cfg;
|
||||
const struct intel_perf_config *perf_cfg = screen->perf_cfg;
|
||||
|
||||
if (!info) {
|
||||
/* return the count that can be queried */
|
||||
|
@ -130,7 +130,7 @@ iris_get_monitor_group_info(struct pipe_screen *pscreen,
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
|
||||
struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
|
||||
|
||||
info->name = query->name;
|
||||
info->max_active_queries = query->n_counters;
|
||||
|
@ -144,13 +144,13 @@ iris_init_monitor_ctx(struct iris_context *ice)
|
|||
{
|
||||
struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
|
||||
|
||||
ice->perf_ctx = gen_perf_new_context(ice);
|
||||
ice->perf_ctx = intel_perf_new_context(ice);
|
||||
if (unlikely(!ice->perf_ctx))
|
||||
return;
|
||||
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct gen_perf_config *perf_cfg = screen->perf_cfg;
|
||||
gen_perf_init_context(perf_ctx,
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = screen->perf_cfg;
|
||||
intel_perf_init_context(perf_ctx,
|
||||
perf_cfg,
|
||||
ice,
|
||||
ice,
|
||||
|
@ -167,8 +167,8 @@ iris_create_monitor_object(struct iris_context *ice,
|
|||
unsigned *query_types)
|
||||
{
|
||||
struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
|
||||
struct gen_perf_config *perf_cfg = screen->perf_cfg;
|
||||
struct gen_perf_query_object *query_obj = NULL;
|
||||
struct intel_perf_config *perf_cfg = screen->perf_cfg;
|
||||
struct intel_perf_query_object *query_obj = NULL;
|
||||
|
||||
/* initialize perf context if this has not already been done. This
|
||||
* function is the first entry point that carries the gl context.
|
||||
|
@ -176,7 +176,7 @@ iris_create_monitor_object(struct iris_context *ice,
|
|||
if (ice->perf_ctx == NULL) {
|
||||
iris_init_monitor_ctx(ice);
|
||||
}
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
assert(num_queries > 0);
|
||||
int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
|
||||
|
@ -204,8 +204,8 @@ iris_create_monitor_object(struct iris_context *ice,
|
|||
perf_cfg->counter_infos[current_query_index].location.counter_idx;
|
||||
}
|
||||
|
||||
/* create the gen_perf_query */
|
||||
query_obj = gen_perf_new_query(perf_ctx, group);
|
||||
/* create the intel_perf_query */
|
||||
query_obj = intel_perf_new_query(perf_ctx, group);
|
||||
if (unlikely(!query_obj))
|
||||
goto allocation_failure;
|
||||
|
||||
|
@ -233,7 +233,7 @@ iris_destroy_monitor_object(struct pipe_context *ctx,
|
|||
{
|
||||
struct iris_context *ice = (struct iris_context *)ctx;
|
||||
|
||||
gen_perf_delete_query(ice->perf_ctx, monitor->query);
|
||||
intel_perf_delete_query(ice->perf_ctx, monitor->query);
|
||||
free(monitor->result_buffer);
|
||||
monitor->result_buffer = NULL;
|
||||
free(monitor->active_counters);
|
||||
|
@ -246,9 +246,9 @@ iris_begin_monitor(struct pipe_context *ctx,
|
|||
struct iris_monitor_object *monitor)
|
||||
{
|
||||
struct iris_context *ice = (void *) ctx;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
return gen_perf_begin_query(perf_ctx, monitor->query);
|
||||
return intel_perf_begin_query(perf_ctx, monitor->query);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -256,9 +256,9 @@ iris_end_monitor(struct pipe_context *ctx,
|
|||
struct iris_monitor_object *monitor)
|
||||
{
|
||||
struct iris_context *ice = (void *) ctx;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
gen_perf_end_query(perf_ctx, monitor->query);
|
||||
intel_perf_end_query(perf_ctx, monitor->query);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -269,22 +269,22 @@ iris_get_monitor_result(struct pipe_context *ctx,
|
|||
union pipe_numeric_type_union *result)
|
||||
{
|
||||
struct iris_context *ice = (void *) ctx;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
|
||||
|
||||
bool monitor_ready =
|
||||
gen_perf_is_query_ready(perf_ctx, monitor->query, batch);
|
||||
intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
|
||||
|
||||
if (!monitor_ready) {
|
||||
if (!wait)
|
||||
return false;
|
||||
gen_perf_wait_query(perf_ctx, monitor->query, batch);
|
||||
intel_perf_wait_query(perf_ctx, monitor->query, batch);
|
||||
}
|
||||
|
||||
assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
|
||||
assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
|
||||
|
||||
unsigned bytes_written;
|
||||
gen_perf_get_query_data(perf_ctx, monitor->query, batch,
|
||||
intel_perf_get_query_data(perf_ctx, monitor->query, batch,
|
||||
monitor->result_size,
|
||||
(unsigned*) monitor->result_buffer,
|
||||
&bytes_written);
|
||||
|
@ -294,11 +294,11 @@ iris_get_monitor_result(struct pipe_context *ctx,
|
|||
/* copy metrics into the batch result */
|
||||
for (int i = 0; i < monitor->num_active_counters; ++i) {
|
||||
int current_counter = monitor->active_counters[i];
|
||||
const struct gen_perf_query_info *info =
|
||||
gen_perf_query_info(monitor->query);
|
||||
const struct gen_perf_query_counter *counter =
|
||||
const struct intel_perf_query_info *info =
|
||||
intel_perf_query_info(monitor->query);
|
||||
const struct intel_perf_query_counter *counter =
|
||||
&info->counters[current_counter];
|
||||
assert(gen_perf_query_counter_get_size(counter));
|
||||
assert(intel_perf_query_counter_get_size(counter));
|
||||
switch (counter->data_type) {
|
||||
case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
|
||||
result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
|
||||
|
|
|
@ -83,7 +83,7 @@ typedef void (*bo_wait_rendering_t)(void *bo);
|
|||
typedef int (*bo_busy_t)(void *bo);
|
||||
|
||||
void
|
||||
iris_perf_init_vtbl(struct gen_perf_config *perf_cfg)
|
||||
iris_perf_init_vtbl(struct intel_perf_config *perf_cfg)
|
||||
{
|
||||
perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
|
||||
perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
|
||||
|
|
|
@ -26,6 +26,6 @@
|
|||
#include "perf/intel_perf.h"
|
||||
#include "perf/intel_perf_query.h"
|
||||
|
||||
void iris_perf_init_vtbl(struct gen_perf_config *cfg);
|
||||
void iris_perf_init_vtbl(struct intel_perf_config *cfg);
|
||||
|
||||
#endif /* IRIS_PERF_H */
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
struct iris_perf_query {
|
||||
struct gl_perf_query_object base;
|
||||
struct gen_perf_query_object *query;
|
||||
struct intel_perf_query_object *query;
|
||||
};
|
||||
|
||||
static unsigned
|
||||
|
@ -35,9 +35,9 @@ iris_init_perf_query_info(struct pipe_context *pipe)
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
|
||||
struct gen_perf_config *perf_cfg = NULL;
|
||||
struct intel_perf_config *perf_cfg = NULL;
|
||||
|
||||
/* make sure pipe perf counter type/data-type enums are matched with gen_perf's */
|
||||
/* make sure pipe perf counter type/data-type enums are matched with intel_perf's */
|
||||
STATIC_ASSERT(PIPE_PERF_COUNTER_TYPE_EVENT == (enum pipe_perf_counter_type)GEN_PERF_COUNTER_TYPE_EVENT);
|
||||
STATIC_ASSERT(PIPE_PERF_COUNTER_TYPE_DURATION_NORM == (enum pipe_perf_counter_type)GEN_PERF_COUNTER_TYPE_DURATION_NORM);
|
||||
STATIC_ASSERT(PIPE_PERF_COUNTER_TYPE_DURATION_RAW == (enum pipe_perf_counter_type)GEN_PERF_COUNTER_TYPE_DURATION_RAW);
|
||||
|
@ -51,21 +51,21 @@ iris_init_perf_query_info(struct pipe_context *pipe)
|
|||
STATIC_ASSERT(PIPE_PERF_COUNTER_DATA_TYPE_DOUBLE == (enum pipe_perf_counter_data_type)GEN_PERF_COUNTER_DATA_TYPE_DOUBLE);
|
||||
|
||||
if (!ice->perf_ctx)
|
||||
ice->perf_ctx = gen_perf_new_context(ice);
|
||||
ice->perf_ctx = intel_perf_new_context(ice);
|
||||
|
||||
if (unlikely(!ice->perf_ctx))
|
||||
return 0;
|
||||
|
||||
perf_cfg = gen_perf_config(ice->perf_ctx);
|
||||
perf_cfg = intel_perf_config(ice->perf_ctx);
|
||||
|
||||
if (perf_cfg)
|
||||
return perf_cfg->n_queries;
|
||||
|
||||
perf_cfg = gen_perf_new(ice->perf_ctx);
|
||||
perf_cfg = intel_perf_new(ice->perf_ctx);
|
||||
|
||||
iris_perf_init_vtbl(perf_cfg);
|
||||
|
||||
gen_perf_init_context(ice->perf_ctx,
|
||||
intel_perf_init_context(ice->perf_ctx,
|
||||
perf_cfg,
|
||||
ice,
|
||||
ice,
|
||||
|
@ -74,7 +74,7 @@ iris_init_perf_query_info(struct pipe_context *pipe)
|
|||
ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
|
||||
screen->fd);
|
||||
|
||||
gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, true /* pipeline_statistics */);
|
||||
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, true /* pipeline_statistics */);
|
||||
|
||||
return perf_cfg->n_queries;
|
||||
}
|
||||
|
@ -83,14 +83,14 @@ static struct pipe_query *
|
|||
iris_new_perf_query_obj(struct pipe_context *pipe, unsigned query_index)
|
||||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct gen_perf_query_object * obj = gen_perf_new_query(perf_ctx, query_index);
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index);
|
||||
if (unlikely(!obj))
|
||||
return NULL;
|
||||
|
||||
struct iris_perf_query *q = calloc(1, sizeof(struct iris_perf_query));
|
||||
if (unlikely(!q)) {
|
||||
gen_perf_delete_query(perf_ctx, obj);
|
||||
intel_perf_delete_query(perf_ctx, obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -103,10 +103,10 @@ iris_begin_perf_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_perf_query *perf_query= (struct iris_perf_query *) q;
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object *obj = perf_query->query;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
return gen_perf_begin_query(perf_ctx, obj);
|
||||
return intel_perf_begin_query(perf_ctx, obj);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -114,10 +114,10 @@ iris_end_perf_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_perf_query *perf_query = (struct iris_perf_query *) q;
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object *obj = perf_query->query;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
gen_perf_end_query(perf_ctx, obj);
|
||||
intel_perf_end_query(perf_ctx, obj);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -125,10 +125,10 @@ iris_delete_perf_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_perf_query *perf_query = (struct iris_perf_query *) q;
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object *obj = perf_query->query;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
gen_perf_delete_query(perf_ctx, obj);
|
||||
intel_perf_delete_query(perf_ctx, obj);
|
||||
free(q);
|
||||
}
|
||||
|
||||
|
@ -141,14 +141,14 @@ iris_get_perf_query_info(struct pipe_context *pipe,
|
|||
uint32_t *n_active)
|
||||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx);
|
||||
const struct gen_perf_query_info *info = &perf_cfg->queries[query_index];
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
|
||||
const struct intel_perf_query_info *info = &perf_cfg->queries[query_index];
|
||||
|
||||
*name = info->name;
|
||||
*data_size = info->data_size;
|
||||
*n_counters = info->n_counters;
|
||||
*n_active = gen_perf_active_queries(perf_ctx, info);
|
||||
*n_active = intel_perf_active_queries(perf_ctx, info);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -164,15 +164,15 @@ iris_get_perf_counter_info(struct pipe_context *pipe,
|
|||
uint64_t *raw_max)
|
||||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx);
|
||||
const struct gen_perf_query_info *info = &perf_cfg->queries[query_index];
|
||||
const struct gen_perf_query_counter *counter = &info->counters[counter_index];
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
|
||||
const struct intel_perf_query_info *info = &perf_cfg->queries[query_index];
|
||||
const struct intel_perf_query_counter *counter = &info->counters[counter_index];
|
||||
|
||||
*name = counter->name;
|
||||
*desc = counter->desc;
|
||||
*offset = counter->offset;
|
||||
*data_size = gen_perf_query_counter_get_size(counter);
|
||||
*data_size = intel_perf_query_counter_get_size(counter);
|
||||
*type_enum = counter->type;
|
||||
*data_type_enum = counter->data_type;
|
||||
*raw_max = counter->raw_max;
|
||||
|
@ -183,10 +183,10 @@ iris_wait_perf_query(struct pipe_context *pipe, struct pipe_query *q)
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_perf_query *perf_query = (struct iris_perf_query *) q;
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object *obj = perf_query->query;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
gen_perf_wait_query(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER]);
|
||||
intel_perf_wait_query(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER]);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -194,13 +194,13 @@ iris_is_perf_query_ready(struct pipe_context *pipe, struct pipe_query *q)
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_perf_query *perf_query = (struct iris_perf_query *) q;
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object *obj = perf_query->query;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
if (perf_query->base.Ready)
|
||||
return true;
|
||||
|
||||
return gen_perf_is_query_ready(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER]);
|
||||
return intel_perf_is_query_ready(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER]);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -212,10 +212,10 @@ iris_get_perf_query_data(struct pipe_context *pipe,
|
|||
{
|
||||
struct iris_context *ice = (void *) pipe;
|
||||
struct iris_perf_query *perf_query = (struct iris_perf_query *) q;
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
struct intel_perf_query_object *obj = perf_query->query;
|
||||
struct intel_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
gen_perf_get_query_data(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER],
|
||||
intel_perf_get_query_data(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER],
|
||||
data_size, data, bytes_written);
|
||||
}
|
||||
|
||||
|
|
|
@ -202,7 +202,7 @@ struct iris_screen {
|
|||
struct isl_device isl_dev;
|
||||
struct iris_bufmgr *bufmgr;
|
||||
struct brw_compiler *compiler;
|
||||
struct gen_perf_config *perf_cfg;
|
||||
struct intel_perf_config *perf_cfg;
|
||||
|
||||
const struct intel_l3_config *l3_config_3d;
|
||||
const struct intel_l3_config *l3_config_cs;
|
||||
|
|
|
@ -286,10 +286,10 @@ def output_counter_read(gen, set, counter):
|
|||
read_eq = counter.get('equation')
|
||||
|
||||
c("static " + ret_type)
|
||||
c(counter.read_sym + "(UNUSED struct gen_perf_config *perf,\n")
|
||||
c(counter.read_sym + "(UNUSED struct intel_perf_config *perf,\n")
|
||||
c_indent(len(counter.read_sym) + 1)
|
||||
c("const struct gen_perf_query_info *query,\n")
|
||||
c("const struct gen_perf_query_result *results)\n")
|
||||
c("const struct intel_perf_query_info *query,\n")
|
||||
c("const struct intel_perf_query_result *results)\n")
|
||||
c_outdent(len(counter.read_sym) + 1)
|
||||
|
||||
c("{")
|
||||
|
@ -321,7 +321,7 @@ def output_counter_max(gen, set, counter):
|
|||
ret_type = "uint64_t"
|
||||
|
||||
c("static " + ret_type)
|
||||
c(counter.max_sym() + "(struct gen_perf_config *perf)\n")
|
||||
c(counter.max_sym() + "(struct intel_perf_config *perf)\n")
|
||||
c("{")
|
||||
c_indent(3)
|
||||
output_rpn_equation_code(set, counter, max_eq)
|
||||
|
@ -468,7 +468,7 @@ def generate_register_configs(set):
|
|||
c_indent(3)
|
||||
|
||||
registers = register_config.findall('register')
|
||||
c("static const struct gen_perf_query_register_prog %s[] = {" % t)
|
||||
c("static const struct intel_perf_query_register_prog %s[] = {" % t)
|
||||
c_indent(3)
|
||||
for register in registers:
|
||||
c("{ .reg = %s, .val = %s }," % (register.get('address'), register.get('value')))
|
||||
|
@ -666,7 +666,7 @@ def main():
|
|||
h(textwrap.dedent("""\
|
||||
#pragma once
|
||||
|
||||
struct gen_perf_config;
|
||||
struct intel_perf_config;
|
||||
|
||||
"""))
|
||||
|
||||
|
@ -709,11 +709,11 @@ def main():
|
|||
|
||||
c("\n")
|
||||
c("\nstatic void\n")
|
||||
c("{0}_register_{1}_counter_query(struct gen_perf_config *perf)\n".format(gen.chipset, set.underscore_name))
|
||||
c("{0}_register_{1}_counter_query(struct intel_perf_config *perf)\n".format(gen.chipset, set.underscore_name))
|
||||
c("{\n")
|
||||
c_indent(3)
|
||||
|
||||
c("struct gen_perf_query_info *query = rzalloc(perf, struct gen_perf_query_info);\n")
|
||||
c("struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);\n")
|
||||
c("\n")
|
||||
c("query->perf = perf;\n")
|
||||
c("query->kind = GEN_PERF_QUERY_TYPE_OA;\n")
|
||||
|
@ -721,7 +721,7 @@ def main():
|
|||
c("query->symbol_name = \"" + set.symbol_name + "\";\n")
|
||||
c("query->guid = \"" + set.hw_config_guid + "\";\n")
|
||||
|
||||
c("query->counters = rzalloc_array(query, struct gen_perf_query_counter, %u);" % len(counters))
|
||||
c("query->counters = rzalloc_array(query, struct intel_perf_query_counter, %u);" % len(counters))
|
||||
c("query->n_counters = 0;")
|
||||
c("query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */")
|
||||
|
||||
|
@ -751,7 +751,7 @@ def main():
|
|||
|
||||
|
||||
c("\n")
|
||||
c("struct gen_perf_query_counter *counter = query->counters;\n")
|
||||
c("struct intel_perf_query_counter *counter = query->counters;\n")
|
||||
|
||||
c("\n")
|
||||
c("/* Note: we're assuming there can't be any variation in the definition ")
|
||||
|
@ -767,7 +767,7 @@ def main():
|
|||
offset = output_counter_report(set, counter, offset)
|
||||
|
||||
|
||||
c("\nquery->data_size = counter->offset + gen_perf_query_counter_get_size(counter);\n")
|
||||
c("\nquery->data_size = counter->offset + intel_perf_query_counter_get_size(counter);\n")
|
||||
|
||||
c_outdent(3)
|
||||
c("}");
|
||||
|
@ -777,10 +777,10 @@ def main():
|
|||
c_outdent(3)
|
||||
c("}\n")
|
||||
|
||||
h("void gen_oa_register_queries_" + gen.chipset + "(struct gen_perf_config *perf);\n")
|
||||
h("void gen_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf);\n")
|
||||
|
||||
c("\nvoid")
|
||||
c("gen_oa_register_queries_" + gen.chipset + "(struct gen_perf_config *perf)")
|
||||
c("gen_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf)")
|
||||
c("{")
|
||||
c_indent(3)
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ is_dir_or_link(const struct dirent *entry, const char *parent_dir)
|
|||
}
|
||||
|
||||
static bool
|
||||
get_sysfs_dev_dir(struct gen_perf_config *perf, int fd)
|
||||
get_sysfs_dev_dir(struct intel_perf_config *perf, int fd)
|
||||
{
|
||||
struct stat sb;
|
||||
int min, maj;
|
||||
|
@ -156,7 +156,7 @@ read_file_uint64(const char *file, uint64_t *val)
|
|||
}
|
||||
|
||||
static bool
|
||||
read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
|
||||
read_sysfs_drm_device_file_uint64(struct intel_perf_config *perf,
|
||||
const char *file,
|
||||
uint64_t *value)
|
||||
{
|
||||
|
@ -173,13 +173,13 @@ read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
static void
|
||||
register_oa_config(struct gen_perf_config *perf,
|
||||
register_oa_config(struct intel_perf_config *perf,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct intel_perf_query_info *query,
|
||||
uint64_t config_id)
|
||||
{
|
||||
struct gen_perf_query_info *registered_query =
|
||||
gen_perf_append_query_info(perf, 0);
|
||||
struct intel_perf_query_info *registered_query =
|
||||
intel_perf_append_query_info(perf, 0);
|
||||
|
||||
*registered_query = *query;
|
||||
registered_query->oa_format = devinfo->ver >= 8 ?
|
||||
|
@ -190,7 +190,7 @@ register_oa_config(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
static void
|
||||
enumerate_sysfs_metrics(struct gen_perf_config *perf,
|
||||
enumerate_sysfs_metrics(struct intel_perf_config *perf,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
DIR *metricsdir = NULL;
|
||||
|
@ -221,13 +221,13 @@ enumerate_sysfs_metrics(struct gen_perf_config *perf,
|
|||
metric_entry->d_name);
|
||||
if (entry) {
|
||||
uint64_t id;
|
||||
if (!gen_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
|
||||
if (!intel_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
|
||||
DBG("Failed to read metric set id from %s: %m", buf);
|
||||
continue;
|
||||
}
|
||||
|
||||
register_oa_config(perf, devinfo,
|
||||
(const struct gen_perf_query_info *)entry->data, id);
|
||||
(const struct intel_perf_query_info *)entry->data, id);
|
||||
} else
|
||||
DBG("metric set not known by mesa (skipping)\n");
|
||||
}
|
||||
|
@ -236,17 +236,17 @@ enumerate_sysfs_metrics(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
static void
|
||||
add_all_metrics(struct gen_perf_config *perf,
|
||||
add_all_metrics(struct intel_perf_config *perf,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
hash_table_foreach(perf->oa_metrics_table, entry) {
|
||||
const struct gen_perf_query_info *query = entry->data;
|
||||
const struct intel_perf_query_info *query = entry->data;
|
||||
register_oa_config(perf, devinfo, query, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
|
||||
kernel_has_dynamic_config_support(struct intel_perf_config *perf, int fd)
|
||||
{
|
||||
uint64_t invalid_config_id = UINT64_MAX;
|
||||
|
||||
|
@ -255,7 +255,7 @@ kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
|
|||
}
|
||||
|
||||
static int
|
||||
i915_query_items(struct gen_perf_config *perf, int fd,
|
||||
i915_query_items(struct intel_perf_config *perf, int fd,
|
||||
struct drm_i915_query_item *items, uint32_t n_items)
|
||||
{
|
||||
struct drm_i915_query q = {
|
||||
|
@ -266,7 +266,7 @@ i915_query_items(struct gen_perf_config *perf, int fd,
|
|||
}
|
||||
|
||||
static bool
|
||||
i915_query_perf_config_supported(struct gen_perf_config *perf, int fd)
|
||||
i915_query_perf_config_supported(struct intel_perf_config *perf, int fd)
|
||||
{
|
||||
struct drm_i915_query_item item = {
|
||||
.query_id = DRM_I915_QUERY_PERF_CONFIG,
|
||||
|
@ -277,7 +277,7 @@ i915_query_perf_config_supported(struct gen_perf_config *perf, int fd)
|
|||
}
|
||||
|
||||
static bool
|
||||
i915_query_perf_config_data(struct gen_perf_config *perf,
|
||||
i915_query_perf_config_data(struct intel_perf_config *perf,
|
||||
int fd, const char *guid,
|
||||
struct drm_i915_perf_oa_config *config)
|
||||
{
|
||||
|
@ -305,7 +305,7 @@ i915_query_perf_config_data(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
bool
|
||||
gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
|
||||
intel_perf_load_metric_id(struct intel_perf_config *perf_cfg,
|
||||
const char *guid,
|
||||
uint64_t *metric_id)
|
||||
{
|
||||
|
@ -319,8 +319,8 @@ gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
|
|||
}
|
||||
|
||||
static uint64_t
|
||||
i915_add_config(struct gen_perf_config *perf, int fd,
|
||||
const struct gen_perf_registers *config,
|
||||
i915_add_config(struct intel_perf_config *perf, int fd,
|
||||
const struct intel_perf_registers *config,
|
||||
const char *guid)
|
||||
{
|
||||
struct drm_i915_perf_oa_config i915_config = { 0, };
|
||||
|
@ -341,14 +341,14 @@ i915_add_config(struct gen_perf_config *perf, int fd,
|
|||
}
|
||||
|
||||
static void
|
||||
init_oa_configs(struct gen_perf_config *perf, int fd,
|
||||
init_oa_configs(struct intel_perf_config *perf, int fd,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
hash_table_foreach(perf->oa_metrics_table, entry) {
|
||||
const struct gen_perf_query_info *query = entry->data;
|
||||
const struct intel_perf_query_info *query = entry->data;
|
||||
uint64_t config_id;
|
||||
|
||||
if (gen_perf_load_metric_id(perf, query->guid, &config_id)) {
|
||||
if (intel_perf_load_metric_id(perf, query->guid, &config_id)) {
|
||||
DBG("metric set: %s (already loaded)\n", query->guid);
|
||||
register_oa_config(perf, devinfo, query, config_id);
|
||||
continue;
|
||||
|
@ -367,7 +367,7 @@ init_oa_configs(struct gen_perf_config *perf, int fd,
|
|||
}
|
||||
|
||||
static void
|
||||
compute_topology_builtins(struct gen_perf_config *perf,
|
||||
compute_topology_builtins(struct intel_perf_config *perf,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
perf->sys_vars.slice_mask = devinfo->slice_masks;
|
||||
|
@ -403,7 +403,7 @@ compute_topology_builtins(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
static bool
|
||||
init_oa_sys_vars(struct gen_perf_config *perf, const struct intel_device_info *devinfo)
|
||||
init_oa_sys_vars(struct intel_perf_config *perf, const struct intel_device_info *devinfo)
|
||||
{
|
||||
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
|
||||
|
||||
|
@ -429,7 +429,7 @@ init_oa_sys_vars(struct gen_perf_config *perf, const struct intel_device_info *d
|
|||
return true;
|
||||
}
|
||||
|
||||
typedef void (*perf_register_oa_queries_t)(struct gen_perf_config *);
|
||||
typedef void (*perf_register_oa_queries_t)(struct intel_perf_config *);
|
||||
|
||||
static perf_register_oa_queries_t
|
||||
get_register_queries_function(const struct intel_device_info *devinfo)
|
||||
|
@ -486,101 +486,101 @@ get_register_queries_function(const struct intel_device_info *devinfo)
|
|||
}
|
||||
|
||||
static int
|
||||
gen_perf_compare_counter_names(const void *v1, const void *v2)
|
||||
intel_perf_compare_counter_names(const void *v1, const void *v2)
|
||||
{
|
||||
const struct gen_perf_query_counter *c1 = v1;
|
||||
const struct gen_perf_query_counter *c2 = v2;
|
||||
const struct intel_perf_query_counter *c1 = v1;
|
||||
const struct intel_perf_query_counter *c2 = v2;
|
||||
|
||||
return strcmp(c1->name, c2->name);
|
||||
}
|
||||
|
||||
static void
|
||||
sort_query(struct gen_perf_query_info *q)
|
||||
sort_query(struct intel_perf_query_info *q)
|
||||
{
|
||||
qsort(q->counters, q->n_counters, sizeof(q->counters[0]),
|
||||
gen_perf_compare_counter_names);
|
||||
intel_perf_compare_counter_names);
|
||||
}
|
||||
|
||||
static void
|
||||
load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
|
||||
load_pipeline_statistic_metrics(struct intel_perf_config *perf_cfg,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
struct gen_perf_query_info *query =
|
||||
gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
|
||||
struct intel_perf_query_info *query =
|
||||
intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
|
||||
|
||||
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
|
||||
query->name = "Pipeline Statistics Registers";
|
||||
|
||||
gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
||||
"N vertices submitted");
|
||||
gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
||||
"N primitives submitted");
|
||||
gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
||||
"N vertex shader invocations");
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
gen_perf_query_add_stat_reg(query, GFX6_SO_PRIM_STORAGE_NEEDED, 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX6_SO_PRIM_STORAGE_NEEDED, 1, 1,
|
||||
"SO_PRIM_STORAGE_NEEDED",
|
||||
"N geometry shader stream-out primitives (total)");
|
||||
gen_perf_query_add_stat_reg(query, GFX6_SO_NUM_PRIMS_WRITTEN, 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX6_SO_NUM_PRIMS_WRITTEN, 1, 1,
|
||||
"SO_NUM_PRIMS_WRITTEN",
|
||||
"N geometry shader stream-out primitives (written)");
|
||||
} else {
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
|
||||
"SO_PRIM_STORAGE_NEEDED (Stream 0)",
|
||||
"N stream-out (stream 0) primitives (total)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
|
||||
"SO_PRIM_STORAGE_NEEDED (Stream 1)",
|
||||
"N stream-out (stream 1) primitives (total)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
|
||||
"SO_PRIM_STORAGE_NEEDED (Stream 2)",
|
||||
"N stream-out (stream 2) primitives (total)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
|
||||
"SO_PRIM_STORAGE_NEEDED (Stream 3)",
|
||||
"N stream-out (stream 3) primitives (total)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
|
||||
"SO_NUM_PRIMS_WRITTEN (Stream 0)",
|
||||
"N stream-out (stream 0) primitives (written)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
|
||||
"SO_NUM_PRIMS_WRITTEN (Stream 1)",
|
||||
"N stream-out (stream 1) primitives (written)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
|
||||
"SO_NUM_PRIMS_WRITTEN (Stream 2)",
|
||||
"N stream-out (stream 2) primitives (written)");
|
||||
gen_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
|
||||
intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
|
||||
"SO_NUM_PRIMS_WRITTEN (Stream 3)",
|
||||
"N stream-out (stream 3) primitives (written)");
|
||||
}
|
||||
|
||||
gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
||||
"N TCS shader invocations");
|
||||
gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
||||
"N TES shader invocations");
|
||||
|
||||
gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
||||
"N geometry shader invocations");
|
||||
gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
||||
"N geometry shader primitives emitted");
|
||||
|
||||
gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
||||
"N primitives entering clipping");
|
||||
gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
||||
"N primitives leaving clipping");
|
||||
|
||||
if (devinfo->is_haswell || devinfo->ver == 8) {
|
||||
gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
||||
intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
||||
"N fragment shader invocations",
|
||||
"N fragment shader invocations");
|
||||
} else {
|
||||
gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
||||
"N fragment shader invocations");
|
||||
}
|
||||
|
||||
gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
|
||||
"N z-pass fragments");
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||
"N compute shader invocations");
|
||||
}
|
||||
|
||||
|
@ -634,8 +634,8 @@ compare_str_or_null(const char *s1, const char *s2)
|
|||
static int
|
||||
compare_counter_categories_and_names(const void *_c1, const void *_c2)
|
||||
{
|
||||
const struct gen_perf_query_counter_info *c1 = (const struct gen_perf_query_counter_info *)_c1;
|
||||
const struct gen_perf_query_counter_info *c2 = (const struct gen_perf_query_counter_info *)_c2;
|
||||
const struct intel_perf_query_counter_info *c1 = (const struct intel_perf_query_counter_info *)_c1;
|
||||
const struct intel_perf_query_counter_info *c2 = (const struct intel_perf_query_counter_info *)_c2;
|
||||
|
||||
/* pipeline counters don't have an assigned category */
|
||||
int r = compare_str_or_null(c1->counter->category, c2->counter->category);
|
||||
|
@ -646,7 +646,7 @@ compare_counter_categories_and_names(const void *_c1, const void *_c2)
|
|||
}
|
||||
|
||||
static void
|
||||
build_unique_counter_list(struct gen_perf_config *perf)
|
||||
build_unique_counter_list(struct intel_perf_config *perf)
|
||||
{
|
||||
assert(perf->n_queries < 64);
|
||||
|
||||
|
@ -660,7 +660,7 @@ build_unique_counter_list(struct gen_perf_config *perf)
|
|||
* We can't alloc it small and realloc when needed because the hash table
|
||||
* below contains pointers to this array.
|
||||
*/
|
||||
struct gen_perf_query_counter_info *counter_infos =
|
||||
struct intel_perf_query_counter_info *counter_infos =
|
||||
ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters);
|
||||
|
||||
perf->n_counters = 0;
|
||||
|
@ -671,11 +671,11 @@ build_unique_counter_list(struct gen_perf_config *perf)
|
|||
_mesa_key_string_equal);
|
||||
struct hash_entry *entry;
|
||||
for (int q = 0; q < perf->n_queries ; q++) {
|
||||
struct gen_perf_query_info *query = &perf->queries[q];
|
||||
struct intel_perf_query_info *query = &perf->queries[q];
|
||||
|
||||
for (int c = 0; c < query->n_counters; c++) {
|
||||
struct gen_perf_query_counter *counter;
|
||||
struct gen_perf_query_counter_info *counter_info;
|
||||
struct intel_perf_query_counter *counter;
|
||||
struct intel_perf_query_counter_info *counter_info;
|
||||
|
||||
counter = &query->counters[c];
|
||||
entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
|
||||
|
@ -709,7 +709,7 @@ build_unique_counter_list(struct gen_perf_config *perf)
|
|||
}
|
||||
|
||||
static bool
|
||||
oa_metrics_available(struct gen_perf_config *perf, int fd,
|
||||
oa_metrics_available(struct intel_perf_config *perf, int fd,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
|
||||
|
@ -751,7 +751,7 @@ oa_metrics_available(struct gen_perf_config *perf, int fd,
|
|||
}
|
||||
|
||||
static void
|
||||
load_oa_metrics(struct gen_perf_config *perf, int fd,
|
||||
load_oa_metrics(struct intel_perf_config *perf, int fd,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
int existing_queries = perf->n_queries;
|
||||
|
@ -794,8 +794,8 @@ load_oa_metrics(struct gen_perf_config *perf, int fd,
|
|||
perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id;
|
||||
}
|
||||
|
||||
struct gen_perf_registers *
|
||||
gen_perf_load_configuration(struct gen_perf_config *perf_cfg, int fd, const char *guid)
|
||||
struct intel_perf_registers *
|
||||
intel_perf_load_configuration(struct intel_perf_config *perf_cfg, int fd, const char *guid)
|
||||
{
|
||||
if (!perf_cfg->i915_query_supported)
|
||||
return NULL;
|
||||
|
@ -804,16 +804,16 @@ gen_perf_load_configuration(struct gen_perf_config *perf_cfg, int fd, const char
|
|||
if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config))
|
||||
return NULL;
|
||||
|
||||
struct gen_perf_registers *config = rzalloc(NULL, struct gen_perf_registers);
|
||||
struct intel_perf_registers *config = rzalloc(NULL, struct intel_perf_registers);
|
||||
config->n_flex_regs = i915_config.n_flex_regs;
|
||||
config->flex_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_flex_regs);
|
||||
config->flex_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_flex_regs);
|
||||
config->n_mux_regs = i915_config.n_mux_regs;
|
||||
config->mux_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_mux_regs);
|
||||
config->mux_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_mux_regs);
|
||||
config->n_b_counter_regs = i915_config.n_boolean_regs;
|
||||
config->b_counter_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_b_counter_regs);
|
||||
config->b_counter_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_b_counter_regs);
|
||||
|
||||
/*
|
||||
* struct gen_perf_query_register_prog maps exactly to the tuple of
|
||||
* struct intel_perf_query_register_prog maps exactly to the tuple of
|
||||
* (register offset, register value) returned by the i915.
|
||||
*/
|
||||
i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
|
||||
|
@ -828,8 +828,8 @@ gen_perf_load_configuration(struct gen_perf_config *perf_cfg, int fd, const char
|
|||
}
|
||||
|
||||
uint64_t
|
||||
gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
|
||||
const struct gen_perf_registers *config,
|
||||
intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd,
|
||||
const struct intel_perf_registers *config,
|
||||
const char *guid)
|
||||
{
|
||||
if (guid)
|
||||
|
@ -869,14 +869,14 @@ gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
|
|||
|
||||
/* Check if already present. */
|
||||
uint64_t id;
|
||||
if (gen_perf_load_metric_id(perf_cfg, generated_guid, &id))
|
||||
if (intel_perf_load_metric_id(perf_cfg, generated_guid, &id))
|
||||
return id;
|
||||
|
||||
return i915_add_config(perf_cfg, fd, config, generated_guid);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_passes_mask(struct gen_perf_config *perf,
|
||||
get_passes_mask(struct intel_perf_config *perf,
|
||||
const uint32_t *counter_indices,
|
||||
uint32_t counter_indices_count)
|
||||
{
|
||||
|
@ -909,10 +909,10 @@ get_passes_mask(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
uint32_t
|
||||
gen_perf_get_n_passes(struct gen_perf_config *perf,
|
||||
intel_perf_get_n_passes(struct intel_perf_config *perf,
|
||||
const uint32_t *counter_indices,
|
||||
uint32_t counter_indices_count,
|
||||
struct gen_perf_query_info **pass_queries)
|
||||
struct intel_perf_query_info **pass_queries)
|
||||
{
|
||||
uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
|
||||
|
||||
|
@ -928,10 +928,10 @@ gen_perf_get_n_passes(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_get_counters_passes(struct gen_perf_config *perf,
|
||||
intel_perf_get_counters_passes(struct intel_perf_config *perf,
|
||||
const uint32_t *counter_indices,
|
||||
uint32_t counter_indices_count,
|
||||
struct gen_perf_counter_pass *counter_pass)
|
||||
struct intel_perf_counter_pass *counter_pass)
|
||||
{
|
||||
uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
|
||||
ASSERTED uint32_t n_passes = __builtin_popcount(queries_mask);
|
||||
|
@ -1013,7 +1013,7 @@ gfx8_read_report_clock_ratios(const uint32_t *report,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
|
||||
intel_perf_query_result_read_frequencies(struct intel_perf_query_result *result,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t *start,
|
||||
const uint32_t *end)
|
||||
|
@ -1044,8 +1044,8 @@ can_use_mi_rpc_bc_counters(const struct intel_device_info *devinfo)
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t *start,
|
||||
const uint32_t *end)
|
||||
|
@ -1111,7 +1111,7 @@ gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
|||
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
|
||||
|
||||
void
|
||||
gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
|
||||
intel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t start,
|
||||
const uint32_t end)
|
||||
|
@ -1138,8 +1138,8 @@ gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result,
|
||||
const struct intel_perf_query_info *query,
|
||||
const uint64_t *start,
|
||||
const uint64_t *end)
|
||||
{
|
||||
|
@ -1154,8 +1154,8 @@ gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
query_accumulator_offset(const struct gen_perf_query_info *query,
|
||||
enum gen_perf_query_field_type type,
|
||||
query_accumulator_offset(const struct intel_perf_query_info *query,
|
||||
enum intel_perf_query_field_type type,
|
||||
uint8_t index)
|
||||
{
|
||||
switch (type) {
|
||||
|
@ -1172,20 +1172,20 @@ query_accumulator_offset(const struct gen_perf_query_info *query,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_device_info *devinfo,
|
||||
const void *start,
|
||||
const void *end,
|
||||
bool no_oa_accumulate)
|
||||
{
|
||||
struct gen_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||
struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
struct gen_perf_query_field *field = &layout->fields[r];
|
||||
struct intel_perf_query_field *field = &layout->fields[r];
|
||||
|
||||
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) {
|
||||
gen_perf_query_result_read_frequencies(result, devinfo,
|
||||
intel_perf_query_result_read_frequencies(result, devinfo,
|
||||
start + field->location,
|
||||
end + field->location);
|
||||
/* no_oa_accumulate=true is used when doing GL perf queries, we
|
||||
|
@ -1193,7 +1193,7 @@ gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
|||
* unrelated deltas, so don't accumulate the begin/end reports here.
|
||||
*/
|
||||
if (!no_oa_accumulate) {
|
||||
gen_perf_query_result_accumulate(result, query, devinfo,
|
||||
intel_perf_query_result_accumulate(result, query, devinfo,
|
||||
start + field->location,
|
||||
end + field->location);
|
||||
}
|
||||
|
@ -1218,7 +1218,7 @@ gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
|||
* represent frequencies. We store it in a separate location.
|
||||
*/
|
||||
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
|
||||
gen_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
|
||||
intel_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
|
||||
else
|
||||
result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
|
||||
}
|
||||
|
@ -1226,21 +1226,21 @@ gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_clear(struct gen_perf_query_result *result)
|
||||
intel_perf_query_result_clear(struct intel_perf_query_result *result)
|
||||
{
|
||||
memset(result, 0, sizeof(*result));
|
||||
result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_print_fields(const struct gen_perf_query_info *query,
|
||||
intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
|
||||
const struct intel_device_info *devinfo,
|
||||
const void *data)
|
||||
{
|
||||
const struct gen_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
const struct gen_perf_query_field *field = &layout->fields[r];
|
||||
const struct intel_perf_query_field *field = &layout->fields[r];
|
||||
const uint32_t *value32 = data + field->location;
|
||||
|
||||
switch (field->type) {
|
||||
|
@ -1262,17 +1262,17 @@ gen_perf_query_result_print_fields(const struct gen_perf_query_info *query,
|
|||
}
|
||||
|
||||
static int
|
||||
gen_perf_compare_query_names(const void *v1, const void *v2)
|
||||
intel_perf_compare_query_names(const void *v1, const void *v2)
|
||||
{
|
||||
const struct gen_perf_query_info *q1 = v1;
|
||||
const struct gen_perf_query_info *q2 = v2;
|
||||
const struct intel_perf_query_info *q1 = v1;
|
||||
const struct intel_perf_query_info *q2 = v2;
|
||||
|
||||
return strcmp(q1->name, q2->name);
|
||||
}
|
||||
|
||||
static inline struct gen_perf_query_field *
|
||||
add_query_register(struct gen_perf_query_field_layout *layout,
|
||||
enum gen_perf_query_field_type type,
|
||||
static inline struct intel_perf_query_field *
|
||||
add_query_register(struct intel_perf_query_field_layout *layout,
|
||||
enum intel_perf_query_field_type type,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
uint8_t index)
|
||||
|
@ -1285,7 +1285,7 @@ add_query_register(struct gen_perf_query_field_layout *layout,
|
|||
else if (size % 8 == 0)
|
||||
layout->size = align(layout->size, 8);
|
||||
|
||||
layout->fields[layout->n_fields++] = (struct gen_perf_query_field) {
|
||||
layout->fields[layout->n_fields++] = (struct intel_perf_query_field) {
|
||||
.mmio_offset = offset,
|
||||
.location = layout->size,
|
||||
.type = type,
|
||||
|
@ -1298,23 +1298,23 @@ add_query_register(struct gen_perf_query_field_layout *layout,
|
|||
}
|
||||
|
||||
static void
|
||||
gen_perf_init_query_fields(struct gen_perf_config *perf_cfg,
|
||||
intel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
|
||||
struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout;
|
||||
|
||||
layout->n_fields = 0;
|
||||
|
||||
/* MI_RPC requires a 64byte alignment. */
|
||||
layout->alignment = 64;
|
||||
|
||||
layout->fields = rzalloc_array(perf_cfg, struct gen_perf_query_field, 5 + 16);
|
||||
layout->fields = rzalloc_array(perf_cfg, struct intel_perf_query_field, 5 + 16);
|
||||
|
||||
add_query_register(layout, GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
|
||||
0, 256, 0);
|
||||
|
||||
if (devinfo->ver <= 11) {
|
||||
struct gen_perf_query_field *field =
|
||||
struct intel_perf_query_field *field =
|
||||
add_query_register(layout,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||
PERF_CNT_1_DW0, 8, 0);
|
||||
|
@ -1367,16 +1367,16 @@ gen_perf_init_query_fields(struct gen_perf_config *perf_cfg,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
||||
intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
|
||||
const struct intel_device_info *devinfo,
|
||||
int drm_fd,
|
||||
bool include_pipeline_statistics)
|
||||
{
|
||||
gen_perf_init_query_fields(perf_cfg, devinfo);
|
||||
intel_perf_init_query_fields(perf_cfg, devinfo);
|
||||
|
||||
if (include_pipeline_statistics) {
|
||||
load_pipeline_statistic_metrics(perf_cfg, devinfo);
|
||||
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
|
||||
intel_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
|
||||
}
|
||||
|
||||
bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo);
|
||||
|
@ -1385,10 +1385,10 @@ gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
|||
|
||||
/* sort query groups by name */
|
||||
qsort(perf_cfg->queries, perf_cfg->n_queries,
|
||||
sizeof(perf_cfg->queries[0]), gen_perf_compare_query_names);
|
||||
sizeof(perf_cfg->queries[0]), intel_perf_compare_query_names);
|
||||
|
||||
build_unique_counter_list(perf_cfg);
|
||||
|
||||
if (oa_metrics)
|
||||
gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
|
||||
intel_perf_register_mdapi_oa_query(perf_cfg, devinfo);
|
||||
}
|
||||
|
|
|
@ -43,10 +43,10 @@
|
|||
|
||||
struct intel_device_info;
|
||||
|
||||
struct gen_perf_config;
|
||||
struct gen_perf_query_info;
|
||||
struct intel_perf_config;
|
||||
struct intel_perf_query_info;
|
||||
|
||||
enum gen_perf_counter_type {
|
||||
enum intel_perf_counter_type {
|
||||
GEN_PERF_COUNTER_TYPE_EVENT,
|
||||
GEN_PERF_COUNTER_TYPE_DURATION_NORM,
|
||||
GEN_PERF_COUNTER_TYPE_DURATION_RAW,
|
||||
|
@ -55,7 +55,7 @@ enum gen_perf_counter_type {
|
|||
GEN_PERF_COUNTER_TYPE_TIMESTAMP,
|
||||
};
|
||||
|
||||
enum gen_perf_counter_data_type {
|
||||
enum intel_perf_counter_data_type {
|
||||
GEN_PERF_COUNTER_DATA_TYPE_BOOL32,
|
||||
GEN_PERF_COUNTER_DATA_TYPE_UINT32,
|
||||
GEN_PERF_COUNTER_DATA_TYPE_UINT64,
|
||||
|
@ -63,7 +63,7 @@ enum gen_perf_counter_data_type {
|
|||
GEN_PERF_COUNTER_DATA_TYPE_DOUBLE,
|
||||
};
|
||||
|
||||
enum gen_perf_counter_units {
|
||||
enum intel_perf_counter_units {
|
||||
/* size */
|
||||
GEN_PERF_COUNTER_UNITS_BYTES,
|
||||
|
||||
|
@ -124,7 +124,7 @@ struct gen_pipeline_stat {
|
|||
#define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \
|
||||
256) /* OA counter report */
|
||||
|
||||
struct gen_perf_query_result {
|
||||
struct intel_perf_query_result {
|
||||
/**
|
||||
* Storage for the final accumulated OA counters.
|
||||
*/
|
||||
|
@ -168,49 +168,49 @@ struct gen_perf_query_result {
|
|||
bool query_disjoint;
|
||||
};
|
||||
|
||||
struct gen_perf_query_counter {
|
||||
struct intel_perf_query_counter {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
const char *symbol_name;
|
||||
const char *category;
|
||||
enum gen_perf_counter_type type;
|
||||
enum gen_perf_counter_data_type data_type;
|
||||
enum gen_perf_counter_units units;
|
||||
enum intel_perf_counter_type type;
|
||||
enum intel_perf_counter_data_type data_type;
|
||||
enum intel_perf_counter_units units;
|
||||
uint64_t raw_max;
|
||||
size_t offset;
|
||||
|
||||
union {
|
||||
uint64_t (*oa_counter_read_uint64)(struct gen_perf_config *perf,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_perf_query_result *results);
|
||||
float (*oa_counter_read_float)(struct gen_perf_config *perf,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_perf_query_result *results);
|
||||
uint64_t (*oa_counter_read_uint64)(struct intel_perf_config *perf,
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_perf_query_result *results);
|
||||
float (*oa_counter_read_float)(struct intel_perf_config *perf,
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_perf_query_result *results);
|
||||
struct gen_pipeline_stat pipeline_stat;
|
||||
};
|
||||
};
|
||||
|
||||
struct gen_perf_query_register_prog {
|
||||
struct intel_perf_query_register_prog {
|
||||
uint32_t reg;
|
||||
uint32_t val;
|
||||
};
|
||||
|
||||
/* Register programming for a given query */
|
||||
struct gen_perf_registers {
|
||||
const struct gen_perf_query_register_prog *flex_regs;
|
||||
struct intel_perf_registers {
|
||||
const struct intel_perf_query_register_prog *flex_regs;
|
||||
uint32_t n_flex_regs;
|
||||
|
||||
const struct gen_perf_query_register_prog *mux_regs;
|
||||
const struct intel_perf_query_register_prog *mux_regs;
|
||||
uint32_t n_mux_regs;
|
||||
|
||||
const struct gen_perf_query_register_prog *b_counter_regs;
|
||||
const struct intel_perf_query_register_prog *b_counter_regs;
|
||||
uint32_t n_b_counter_regs;
|
||||
};
|
||||
|
||||
struct gen_perf_query_info {
|
||||
struct gen_perf_config *perf;
|
||||
struct intel_perf_query_info {
|
||||
struct intel_perf_config *perf;
|
||||
|
||||
enum gen_perf_query_type {
|
||||
enum intel_perf_query_type {
|
||||
GEN_PERF_QUERY_TYPE_OA,
|
||||
GEN_PERF_QUERY_TYPE_RAW,
|
||||
GEN_PERF_QUERY_TYPE_PIPELINE,
|
||||
|
@ -218,7 +218,7 @@ struct gen_perf_query_info {
|
|||
const char *name;
|
||||
const char *symbol_name;
|
||||
const char *guid;
|
||||
struct gen_perf_query_counter *counters;
|
||||
struct intel_perf_query_counter *counters;
|
||||
int n_counters;
|
||||
int max_counters;
|
||||
size_t data_size;
|
||||
|
@ -236,15 +236,15 @@ struct gen_perf_query_info {
|
|||
int perfcnt_offset;
|
||||
int rpstat_offset;
|
||||
|
||||
struct gen_perf_registers config;
|
||||
struct intel_perf_registers config;
|
||||
};
|
||||
|
||||
/* When not using the MI_RPC command, this structure describes the list of
|
||||
* register offsets as well as their storage location so that they can be
|
||||
* stored through a series of MI_SRM commands and accumulated with
|
||||
* gen_perf_query_result_accumulate_snapshots().
|
||||
* intel_perf_query_result_accumulate_snapshots().
|
||||
*/
|
||||
struct gen_perf_query_field_layout {
|
||||
struct intel_perf_query_field_layout {
|
||||
/* Alignment for the layout */
|
||||
uint32_t alignment;
|
||||
|
||||
|
@ -253,17 +253,17 @@ struct gen_perf_query_field_layout {
|
|||
|
||||
uint32_t n_fields;
|
||||
|
||||
struct gen_perf_query_field {
|
||||
struct intel_perf_query_field {
|
||||
/* MMIO location of this register */
|
||||
uint16_t mmio_offset;
|
||||
|
||||
/* Location of this register in the storage */
|
||||
uint16_t location;
|
||||
|
||||
/* Type of register, for accumulation (see gen_perf_query_info:*_offset
|
||||
/* Type of register, for accumulation (see intel_perf_query_info:*_offset
|
||||
* fields)
|
||||
*/
|
||||
enum gen_perf_query_field_type {
|
||||
enum intel_perf_query_field_type {
|
||||
GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
|
||||
|
@ -284,8 +284,8 @@ struct gen_perf_query_field_layout {
|
|||
} *fields;
|
||||
};
|
||||
|
||||
struct gen_perf_query_counter_info {
|
||||
struct gen_perf_query_counter *counter;
|
||||
struct intel_perf_query_counter_info {
|
||||
struct intel_perf_query_counter *counter;
|
||||
|
||||
uint64_t query_mask;
|
||||
|
||||
|
@ -299,7 +299,7 @@ struct gen_perf_query_counter_info {
|
|||
} location;
|
||||
};
|
||||
|
||||
struct gen_perf_config {
|
||||
struct intel_perf_config {
|
||||
/* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */
|
||||
bool i915_query_supported;
|
||||
|
||||
|
@ -309,13 +309,13 @@ struct gen_perf_config {
|
|||
/* Powergating configuration for the running the query. */
|
||||
struct drm_i915_gem_context_param_sseu sseu;
|
||||
|
||||
struct gen_perf_query_info *queries;
|
||||
struct intel_perf_query_info *queries;
|
||||
int n_queries;
|
||||
|
||||
struct gen_perf_query_counter_info *counter_infos;
|
||||
struct intel_perf_query_counter_info *counter_infos;
|
||||
int n_counters;
|
||||
|
||||
struct gen_perf_query_field_layout query_layout;
|
||||
struct intel_perf_query_field_layout query_layout;
|
||||
|
||||
/* Variables referenced in the XML meta data for OA performance
|
||||
* counters, e.g in the normalization equations.
|
||||
|
@ -376,26 +376,26 @@ struct gen_perf_config {
|
|||
} vtbl;
|
||||
};
|
||||
|
||||
struct gen_perf_counter_pass {
|
||||
struct gen_perf_query_info *query;
|
||||
struct gen_perf_query_counter *counter;
|
||||
struct intel_perf_counter_pass {
|
||||
struct intel_perf_query_info *query;
|
||||
struct intel_perf_query_counter *counter;
|
||||
uint32_t pass;
|
||||
};
|
||||
|
||||
void gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
||||
void intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
|
||||
const struct intel_device_info *devinfo,
|
||||
int drm_fd,
|
||||
bool include_pipeline_statistics);
|
||||
|
||||
/** Query i915 for a metric id using guid.
|
||||
*/
|
||||
bool gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
|
||||
bool intel_perf_load_metric_id(struct intel_perf_config *perf_cfg,
|
||||
const char *guid,
|
||||
uint64_t *metric_id);
|
||||
|
||||
/** Load a configuation's content from i915 using a guid.
|
||||
*/
|
||||
struct gen_perf_registers *gen_perf_load_configuration(struct gen_perf_config *perf_cfg,
|
||||
struct intel_perf_registers *intel_perf_load_configuration(struct intel_perf_config *perf_cfg,
|
||||
int fd, const char *guid);
|
||||
|
||||
/** Store a configuration into i915 using guid and return a new metric id.
|
||||
|
@ -403,60 +403,60 @@ struct gen_perf_registers *gen_perf_load_configuration(struct gen_perf_config *p
|
|||
* If guid is NULL, then a generated one will be provided by hashing the
|
||||
* content of the configuration.
|
||||
*/
|
||||
uint64_t gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
|
||||
const struct gen_perf_registers *config,
|
||||
uint64_t intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd,
|
||||
const struct intel_perf_registers *config,
|
||||
const char *guid);
|
||||
|
||||
/** Read the slice/unslice frequency from 2 OA reports and store then into
|
||||
* result.
|
||||
*/
|
||||
void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
|
||||
void intel_perf_query_result_read_frequencies(struct intel_perf_query_result *result,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t *start,
|
||||
const uint32_t *end);
|
||||
|
||||
/** Store the GT frequency as reported by the RPSTAT register.
|
||||
*/
|
||||
void gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
|
||||
void intel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t start,
|
||||
const uint32_t end);
|
||||
|
||||
/** Store PERFCNT registers values.
|
||||
*/
|
||||
void gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
void intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result,
|
||||
const struct intel_perf_query_info *query,
|
||||
const uint64_t *start,
|
||||
const uint64_t *end);
|
||||
|
||||
/** Accumulate the delta between 2 OA reports into result for a given query.
|
||||
*/
|
||||
void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
void intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_device_info *devinfo,
|
||||
const uint32_t *start,
|
||||
const uint32_t *end);
|
||||
|
||||
/** Accumulate the delta between 2 snapshots of OA perf registers (layout
|
||||
* should match description specified through gen_perf_query_register_layout).
|
||||
* should match description specified through intel_perf_query_register_layout).
|
||||
*/
|
||||
void gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
void intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_device_info *devinfo,
|
||||
const void *start,
|
||||
const void *end,
|
||||
bool no_oa_accumulate);
|
||||
|
||||
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
|
||||
void intel_perf_query_result_clear(struct intel_perf_query_result *result);
|
||||
|
||||
/** Debug helper printing out query data.
|
||||
*/
|
||||
void gen_perf_query_result_print_fields(const struct gen_perf_query_info *query,
|
||||
void intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
|
||||
const struct intel_device_info *devinfo,
|
||||
const void *data);
|
||||
|
||||
static inline size_t
|
||||
gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
|
||||
intel_perf_query_counter_get_size(const struct intel_perf_query_counter *counter)
|
||||
{
|
||||
switch (counter->data_type) {
|
||||
case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
|
||||
|
@ -474,10 +474,10 @@ gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
|
|||
}
|
||||
}
|
||||
|
||||
static inline struct gen_perf_config *
|
||||
gen_perf_new(void *ctx)
|
||||
static inline struct intel_perf_config *
|
||||
intel_perf_new(void *ctx)
|
||||
{
|
||||
struct gen_perf_config *perf = rzalloc(ctx, struct gen_perf_config);
|
||||
struct intel_perf_config *perf = rzalloc(ctx, struct intel_perf_config);
|
||||
return perf;
|
||||
}
|
||||
|
||||
|
@ -486,7 +486,7 @@ gen_perf_new(void *ctx)
|
|||
* values captured through MI_* commands.
|
||||
*/
|
||||
static inline bool
|
||||
gen_perf_has_hold_preemption(const struct gen_perf_config *perf)
|
||||
intel_perf_has_hold_preemption(const struct intel_perf_config *perf)
|
||||
{
|
||||
return perf->i915_perf_version >= 3;
|
||||
}
|
||||
|
@ -496,18 +496,18 @@ gen_perf_has_hold_preemption(const struct gen_perf_config *perf)
|
|||
* architecture requires half the EU for particular workloads.
|
||||
*/
|
||||
static inline bool
|
||||
gen_perf_has_global_sseu(const struct gen_perf_config *perf)
|
||||
intel_perf_has_global_sseu(const struct intel_perf_config *perf)
|
||||
{
|
||||
return perf->i915_perf_version >= 4;
|
||||
}
|
||||
|
||||
uint32_t gen_perf_get_n_passes(struct gen_perf_config *perf,
|
||||
uint32_t intel_perf_get_n_passes(struct intel_perf_config *perf,
|
||||
const uint32_t *counter_indices,
|
||||
uint32_t counter_indices_count,
|
||||
struct gen_perf_query_info **pass_queries);
|
||||
void gen_perf_get_counters_passes(struct gen_perf_config *perf,
|
||||
struct intel_perf_query_info **pass_queries);
|
||||
void intel_perf_get_counters_passes(struct intel_perf_config *perf,
|
||||
const uint32_t *counter_indices,
|
||||
uint32_t counter_indices_count,
|
||||
struct gen_perf_counter_pass *counter_pass);
|
||||
struct intel_perf_counter_pass *counter_pass);
|
||||
|
||||
#endif /* INTEL_PERF_H */
|
||||
|
|
|
@ -32,10 +32,10 @@
|
|||
|
||||
|
||||
int
|
||||
gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_perf_query_result *result)
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_perf_query_result *result)
|
||||
{
|
||||
switch (devinfo->ver) {
|
||||
case 7: {
|
||||
|
@ -137,47 +137,47 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
|
||||
intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
|
||||
return;
|
||||
|
||||
struct gen_perf_query_info *query =
|
||||
gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
|
||||
struct intel_perf_query_info *query =
|
||||
intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
|
||||
|
||||
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
|
||||
query->name = "Intel_Raw_Pipeline_Statistics_Query";
|
||||
|
||||
/* The order has to match mdapi_pipeline_metrics. */
|
||||
gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
||||
"N vertices submitted");
|
||||
gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
||||
"N primitives submitted");
|
||||
gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
||||
"N vertex shader invocations");
|
||||
gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
||||
"N geometry shader invocations");
|
||||
gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
||||
"N geometry shader primitives emitted");
|
||||
gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
||||
"N primitives entering clipping");
|
||||
gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
||||
"N primitives leaving clipping");
|
||||
if (devinfo->is_haswell || devinfo->ver == 8) {
|
||||
gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
||||
intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
||||
"N fragment shader invocations",
|
||||
"N fragment shader invocations");
|
||||
} else {
|
||||
gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
||||
"N fragment shader invocations");
|
||||
}
|
||||
gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
||||
"N TCS shader invocations");
|
||||
gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
||||
"N TES shader invocations");
|
||||
if (devinfo->ver >= 7) {
|
||||
gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||
"N compute shader invocations");
|
||||
}
|
||||
|
||||
|
@ -185,7 +185,7 @@ gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
|
|||
/* Reuse existing CS invocation register until we can expose this new
|
||||
* one.
|
||||
*/
|
||||
gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||
intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||
"Reserved1");
|
||||
}
|
||||
|
||||
|
@ -193,13 +193,13 @@ gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
|
|||
}
|
||||
|
||||
static void
|
||||
fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
|
||||
fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,
|
||||
const char *name,
|
||||
uint32_t data_offset,
|
||||
uint32_t data_size,
|
||||
enum gen_perf_counter_data_type data_type)
|
||||
enum intel_perf_counter_data_type data_type)
|
||||
{
|
||||
struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
|
||||
struct intel_perf_query_counter *counter = &query->counters[query->n_counters];
|
||||
|
||||
assert(query->n_counters <= query->max_counters);
|
||||
|
||||
|
@ -211,7 +211,7 @@ fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
|
|||
|
||||
query->n_counters++;
|
||||
|
||||
assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
|
||||
assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);
|
||||
}
|
||||
|
||||
#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
|
||||
|
@ -229,10 +229,10 @@ fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
|
|||
GEN_PERF_COUNTER_DATA_TYPE_##type_name)
|
||||
|
||||
void
|
||||
gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
||||
intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
struct gen_perf_query_info *query = NULL;
|
||||
struct intel_perf_query_info *query = NULL;
|
||||
|
||||
/* MDAPI requires different structures for pretty much every generation
|
||||
* (right now we have definitions for gen 7 to 12).
|
||||
|
@ -242,7 +242,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
|
||||
switch (devinfo->ver) {
|
||||
case 7: {
|
||||
query = gen_perf_append_query_info(perf, 1 + 45 + 16 + 7);
|
||||
query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
|
||||
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
|
||||
|
||||
struct gfx7_mdapi_metrics metric_data;
|
||||
|
@ -267,7 +267,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
break;
|
||||
}
|
||||
case 8: {
|
||||
query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16);
|
||||
query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
|
||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
|
||||
struct gfx8_mdapi_metrics metric_data;
|
||||
|
@ -304,7 +304,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
case 9:
|
||||
case 11:
|
||||
case 12: {
|
||||
query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
|
||||
query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
|
||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
|
||||
struct gfx9_mdapi_metrics metric_data;
|
||||
|
@ -355,7 +355,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
|
||||
{
|
||||
/* Accumulation buffer offsets copied from an actual query... */
|
||||
const struct gen_perf_query_info *copy_query =
|
||||
const struct intel_perf_query_info *copy_query =
|
||||
&perf->queries[0];
|
||||
|
||||
query->gpu_time_offset = copy_query->gpu_time_offset;
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
#include "dev/intel_device_info.h"
|
||||
|
||||
struct gen_perf_query_result;
|
||||
struct intel_perf_query_result;
|
||||
|
||||
/* Guid has to matches with MDAPI's. */
|
||||
#define GEN_PERF_QUERY_GUID_MDAPI "2f01b241-7014-42a7-9eb6-a925cad3daba"
|
||||
|
@ -127,12 +127,12 @@ struct mdapi_pipeline_metrics {
|
|||
uint64_t Reserved1; /* Gfx10+ */
|
||||
};
|
||||
|
||||
int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
int intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_perf_query_result *result);
|
||||
const struct intel_perf_query_info *query,
|
||||
const struct intel_perf_query_result *result);
|
||||
|
||||
static inline void gen_perf_query_mdapi_write_marker(void *data, uint32_t data_size,
|
||||
static inline void intel_perf_query_mdapi_write_marker(void *data, uint32_t data_size,
|
||||
const struct intel_device_info *devinfo,
|
||||
uint64_t value)
|
||||
{
|
||||
|
|
|
@ -37,11 +37,11 @@ static inline uint64_t to_const_user_pointer(const void *ptr)
|
|||
}
|
||||
|
||||
static inline void
|
||||
gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
|
||||
intel_perf_query_add_stat_reg(struct intel_perf_query_info *query, uint32_t reg,
|
||||
uint32_t numerator, uint32_t denominator,
|
||||
const char *name, const char *description)
|
||||
{
|
||||
struct gen_perf_query_counter *counter;
|
||||
struct intel_perf_query_counter *counter;
|
||||
|
||||
assert(query->n_counters < query->max_counters);
|
||||
|
||||
|
@ -59,19 +59,19 @@ gen_perf_query_add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
|
|||
}
|
||||
|
||||
static inline void
|
||||
gen_perf_query_add_basic_stat_reg(struct gen_perf_query_info *query,
|
||||
intel_perf_query_add_basic_stat_reg(struct intel_perf_query_info *query,
|
||||
uint32_t reg, const char *name)
|
||||
{
|
||||
gen_perf_query_add_stat_reg(query, reg, 1, 1, name, name);
|
||||
intel_perf_query_add_stat_reg(query, reg, 1, 1, name, name);
|
||||
}
|
||||
|
||||
static inline struct gen_perf_query_info *
|
||||
gen_perf_append_query_info(struct gen_perf_config *perf, int max_counters)
|
||||
static inline struct intel_perf_query_info *
|
||||
intel_perf_append_query_info(struct intel_perf_config *perf, int max_counters)
|
||||
{
|
||||
struct gen_perf_query_info *query;
|
||||
struct intel_perf_query_info *query;
|
||||
|
||||
perf->queries = reralloc(perf, perf->queries,
|
||||
struct gen_perf_query_info,
|
||||
struct intel_perf_query_info,
|
||||
++perf->n_queries);
|
||||
query = &perf->queries[perf->n_queries - 1];
|
||||
memset(query, 0, sizeof(*query));
|
||||
|
@ -81,15 +81,15 @@ gen_perf_append_query_info(struct gen_perf_config *perf, int max_counters)
|
|||
if (max_counters > 0) {
|
||||
query->max_counters = max_counters;
|
||||
query->counters =
|
||||
rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
|
||||
rzalloc_array(perf, struct intel_perf_query_counter, max_counters);
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
void gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
|
||||
void intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
|
||||
const struct intel_device_info *devinfo);
|
||||
void gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
||||
void intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
|
||||
const struct intel_device_info *devinfo);
|
||||
|
||||
|
||||
|
|
|
@ -190,9 +190,9 @@ struct oa_sample_buf {
|
|||
* applications may expect to allocate enough objects to be able to
|
||||
* query around all draw calls in a frame.
|
||||
*/
|
||||
struct gen_perf_query_object
|
||||
struct intel_perf_query_object
|
||||
{
|
||||
const struct gen_perf_query_info *queryinfo;
|
||||
const struct intel_perf_query_info *queryinfo;
|
||||
|
||||
/* See query->kind to know which state below is in use... */
|
||||
union {
|
||||
|
@ -237,7 +237,7 @@ struct gen_perf_query_object
|
|||
/**
|
||||
* Accumulated OA results between begin and end of the query.
|
||||
*/
|
||||
struct gen_perf_query_result result;
|
||||
struct intel_perf_query_result result;
|
||||
} oa;
|
||||
|
||||
struct {
|
||||
|
@ -250,8 +250,8 @@ struct gen_perf_query_object
|
|||
};
|
||||
};
|
||||
|
||||
struct gen_perf_context {
|
||||
struct gen_perf_config *perf;
|
||||
struct intel_perf_context {
|
||||
struct intel_perf_config *perf;
|
||||
|
||||
void * mem_ctx; /* ralloc context */
|
||||
void * ctx; /* driver context (eg, brw_context) */
|
||||
|
@ -304,7 +304,7 @@ struct gen_perf_context {
|
|||
* These may be active, or have already ended. However, the
|
||||
* results have not been requested.
|
||||
*/
|
||||
struct gen_perf_query_object **unaccumulated;
|
||||
struct intel_perf_query_object **unaccumulated;
|
||||
int unaccumulated_elements;
|
||||
int unaccumulated_array_size;
|
||||
|
||||
|
@ -317,7 +317,7 @@ struct gen_perf_context {
|
|||
};
|
||||
|
||||
static bool
|
||||
inc_n_users(struct gen_perf_context *perf_ctx)
|
||||
inc_n_users(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
if (perf_ctx->n_oa_users == 0 &&
|
||||
intel_ioctl(perf_ctx->oa_stream_fd, I915_PERF_IOCTL_ENABLE, 0) < 0)
|
||||
|
@ -330,7 +330,7 @@ inc_n_users(struct gen_perf_context *perf_ctx)
|
|||
}
|
||||
|
||||
static void
|
||||
dec_n_users(struct gen_perf_context *perf_ctx)
|
||||
dec_n_users(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
/* Disabling the i915 perf stream will effectively disable the OA
|
||||
* counters. Note it's important to be sure there are no outstanding
|
||||
|
@ -346,22 +346,22 @@ dec_n_users(struct gen_perf_context *perf_ctx)
|
|||
}
|
||||
|
||||
static void
|
||||
gen_perf_close(struct gen_perf_context *perfquery,
|
||||
const struct gen_perf_query_info *query)
|
||||
intel_perf_close(struct intel_perf_context *perfquery,
|
||||
const struct intel_perf_query_info *query)
|
||||
{
|
||||
if (perfquery->oa_stream_fd != -1) {
|
||||
close(perfquery->oa_stream_fd);
|
||||
perfquery->oa_stream_fd = -1;
|
||||
}
|
||||
if (query->kind == GEN_PERF_QUERY_TYPE_RAW) {
|
||||
struct gen_perf_query_info *raw_query =
|
||||
(struct gen_perf_query_info *) query;
|
||||
struct intel_perf_query_info *raw_query =
|
||||
(struct intel_perf_query_info *) query;
|
||||
raw_query->oa_metrics_set_id = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
gen_perf_open(struct gen_perf_context *perf_ctx,
|
||||
intel_perf_open(struct intel_perf_context *perf_ctx,
|
||||
int metrics_set_id,
|
||||
int report_format,
|
||||
int period_exponent,
|
||||
|
@ -390,7 +390,7 @@ gen_perf_open(struct gen_perf_context *perf_ctx,
|
|||
properties[p++] = period_exponent;
|
||||
|
||||
/* SSEU configuration */
|
||||
if (gen_perf_has_global_sseu(perf_ctx->perf)) {
|
||||
if (intel_perf_has_global_sseu(perf_ctx->perf)) {
|
||||
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
|
||||
properties[p++] = to_user_pointer(&perf_ctx->perf->sseu);
|
||||
}
|
||||
|
@ -419,8 +419,8 @@ gen_perf_open(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
static uint64_t
|
||||
get_metric_id(struct gen_perf_config *perf,
|
||||
const struct gen_perf_query_info *query)
|
||||
get_metric_id(struct intel_perf_config *perf,
|
||||
const struct intel_perf_query_info *query)
|
||||
{
|
||||
/* These queries are know not to ever change, their config ID has been
|
||||
* loaded upon the first query creation. No need to look them up again.
|
||||
|
@ -441,8 +441,8 @@ get_metric_id(struct gen_perf_config *perf,
|
|||
return query->oa_metrics_set_id;
|
||||
}
|
||||
|
||||
struct gen_perf_query_info *raw_query = (struct gen_perf_query_info *)query;
|
||||
if (!gen_perf_load_metric_id(perf, query->guid,
|
||||
struct intel_perf_query_info *raw_query = (struct intel_perf_query_info *)query;
|
||||
if (!intel_perf_load_metric_id(perf, query->guid,
|
||||
&raw_query->oa_metrics_set_id)) {
|
||||
DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid);
|
||||
raw_query->oa_metrics_set_id = perf->fallback_raw_oa_metric;
|
||||
|
@ -454,7 +454,7 @@ get_metric_id(struct gen_perf_config *perf,
|
|||
}
|
||||
|
||||
static struct oa_sample_buf *
|
||||
get_free_sample_buf(struct gen_perf_context *perf_ctx)
|
||||
get_free_sample_buf(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
struct exec_node *node = exec_list_pop_head(&perf_ctx->free_sample_buffers);
|
||||
struct oa_sample_buf *buf;
|
||||
|
@ -473,7 +473,7 @@ get_free_sample_buf(struct gen_perf_context *perf_ctx)
|
|||
}
|
||||
|
||||
static void
|
||||
reap_old_sample_buffers(struct gen_perf_context *perf_ctx)
|
||||
reap_old_sample_buffers(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
struct exec_node *tail_node =
|
||||
exec_list_get_tail(&perf_ctx->sample_buffers);
|
||||
|
@ -497,7 +497,7 @@ reap_old_sample_buffers(struct gen_perf_context *perf_ctx)
|
|||
}
|
||||
|
||||
static void
|
||||
free_sample_bufs(struct gen_perf_context *perf_ctx)
|
||||
free_sample_bufs(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
foreach_list_typed_safe(struct oa_sample_buf, buf, link,
|
||||
&perf_ctx->free_sample_buffers)
|
||||
|
@ -507,13 +507,13 @@ free_sample_bufs(struct gen_perf_context *perf_ctx)
|
|||
}
|
||||
|
||||
|
||||
struct gen_perf_query_object *
|
||||
gen_perf_new_query(struct gen_perf_context *perf_ctx, unsigned query_index)
|
||||
struct intel_perf_query_object *
|
||||
intel_perf_new_query(struct intel_perf_context *perf_ctx, unsigned query_index)
|
||||
{
|
||||
const struct gen_perf_query_info *query =
|
||||
const struct intel_perf_query_info *query =
|
||||
&perf_ctx->perf->queries[query_index];
|
||||
struct gen_perf_query_object *obj =
|
||||
calloc(1, sizeof(struct gen_perf_query_object));
|
||||
struct intel_perf_query_object *obj =
|
||||
calloc(1, sizeof(struct intel_perf_query_object));
|
||||
|
||||
if (!obj)
|
||||
return NULL;
|
||||
|
@ -525,8 +525,8 @@ gen_perf_new_query(struct gen_perf_context *perf_ctx, unsigned query_index)
|
|||
}
|
||||
|
||||
int
|
||||
gen_perf_active_queries(struct gen_perf_context *perf_ctx,
|
||||
const struct gen_perf_query_info *query)
|
||||
intel_perf_active_queries(struct intel_perf_context *perf_ctx,
|
||||
const struct intel_perf_query_info *query)
|
||||
{
|
||||
assert(perf_ctx->n_active_oa_queries == 0 || perf_ctx->n_active_pipeline_stats_queries == 0);
|
||||
|
||||
|
@ -546,30 +546,30 @@ gen_perf_active_queries(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
}
|
||||
|
||||
const struct gen_perf_query_info*
|
||||
gen_perf_query_info(const struct gen_perf_query_object *query)
|
||||
const struct intel_perf_query_info*
|
||||
intel_perf_query_info(const struct intel_perf_query_object *query)
|
||||
{
|
||||
return query->queryinfo;
|
||||
}
|
||||
|
||||
struct gen_perf_context *
|
||||
gen_perf_new_context(void *parent)
|
||||
struct intel_perf_context *
|
||||
intel_perf_new_context(void *parent)
|
||||
{
|
||||
struct gen_perf_context *ctx = rzalloc(parent, struct gen_perf_context);
|
||||
struct intel_perf_context *ctx = rzalloc(parent, struct intel_perf_context);
|
||||
if (! ctx)
|
||||
fprintf(stderr, "%s: failed to alloc context\n", __func__);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
struct gen_perf_config *
|
||||
gen_perf_config(struct gen_perf_context *ctx)
|
||||
struct intel_perf_config *
|
||||
intel_perf_config(struct intel_perf_context *ctx)
|
||||
{
|
||||
return ctx->perf;
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_config *perf_cfg,
|
||||
intel_perf_init_context(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_config *perf_cfg,
|
||||
void * mem_ctx, /* ralloc context */
|
||||
void * ctx, /* driver context (eg, brw_context) */
|
||||
void * bufmgr, /* eg brw_bufmgr */
|
||||
|
@ -586,7 +586,7 @@ gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
|||
perf_ctx->devinfo = devinfo;
|
||||
|
||||
perf_ctx->unaccumulated =
|
||||
ralloc_array(mem_ctx, struct gen_perf_query_object *, 2);
|
||||
ralloc_array(mem_ctx, struct intel_perf_query_object *, 2);
|
||||
perf_ctx->unaccumulated_elements = 0;
|
||||
perf_ctx->unaccumulated_array_size = 2;
|
||||
|
||||
|
@ -613,8 +613,8 @@ gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
|||
* MI_REPORT_PERF_COUNT has landed in query->oa.bo.
|
||||
*/
|
||||
static void
|
||||
add_to_unaccumulated_query_list(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *obj)
|
||||
add_to_unaccumulated_query_list(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *obj)
|
||||
{
|
||||
if (perf_ctx->unaccumulated_elements >=
|
||||
perf_ctx->unaccumulated_array_size)
|
||||
|
@ -622,7 +622,7 @@ add_to_unaccumulated_query_list(struct gen_perf_context *perf_ctx,
|
|||
perf_ctx->unaccumulated_array_size *= 1.5;
|
||||
perf_ctx->unaccumulated =
|
||||
reralloc(perf_ctx->mem_ctx, perf_ctx->unaccumulated,
|
||||
struct gen_perf_query_object *,
|
||||
struct intel_perf_query_object *,
|
||||
perf_ctx->unaccumulated_array_size);
|
||||
}
|
||||
|
||||
|
@ -634,16 +634,16 @@ add_to_unaccumulated_query_list(struct gen_perf_context *perf_ctx,
|
|||
* pipeline statistics for the performance query object.
|
||||
*/
|
||||
static void
|
||||
snapshot_statistics_registers(struct gen_perf_context *ctx,
|
||||
struct gen_perf_query_object *obj,
|
||||
snapshot_statistics_registers(struct intel_perf_context *ctx,
|
||||
struct intel_perf_query_object *obj,
|
||||
uint32_t offset_in_bytes)
|
||||
{
|
||||
struct gen_perf_config *perf = ctx->perf;
|
||||
const struct gen_perf_query_info *query = obj->queryinfo;
|
||||
struct intel_perf_config *perf = ctx->perf;
|
||||
const struct intel_perf_query_info *query = obj->queryinfo;
|
||||
const int n_counters = query->n_counters;
|
||||
|
||||
for (int i = 0; i < n_counters; i++) {
|
||||
const struct gen_perf_query_counter *counter = &query->counters[i];
|
||||
const struct intel_perf_query_counter *counter = &query->counters[i];
|
||||
|
||||
assert(counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
|
||||
|
||||
|
@ -654,16 +654,16 @@ snapshot_statistics_registers(struct gen_perf_context *ctx,
|
|||
}
|
||||
|
||||
static void
|
||||
snapshot_query_layout(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
snapshot_query_layout(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
bool end_snapshot)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout;
|
||||
uint32_t offset = end_snapshot ? align(layout->size, layout->alignment) : 0;
|
||||
|
||||
for (uint32_t f = 0; f < layout->n_fields; f++) {
|
||||
const struct gen_perf_query_field *field =
|
||||
const struct intel_perf_query_field *field =
|
||||
&layout->fields[end_snapshot ? f : (layout->n_fields - 1 - f)];
|
||||
|
||||
switch (field->type) {
|
||||
|
@ -688,11 +688,11 @@ snapshot_query_layout(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
bool
|
||||
gen_perf_begin_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query)
|
||||
intel_perf_begin_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct gen_perf_query_info *queryinfo = query->queryinfo;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct intel_perf_query_info *queryinfo = query->queryinfo;
|
||||
|
||||
/* XXX: We have to consider that the command parser unit that parses batch
|
||||
* buffer commands and is used to capture begin/end counter snapshots isn't
|
||||
|
@ -754,7 +754,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
|
|||
perf_ctx->current_oa_metrics_set_id, metric_id);
|
||||
return false;
|
||||
} else
|
||||
gen_perf_close(perf_ctx, queryinfo);
|
||||
intel_perf_close(perf_ctx, queryinfo);
|
||||
}
|
||||
|
||||
/* If the OA counters aren't already on, enable them. */
|
||||
|
@ -813,7 +813,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
|
|||
DBG("OA sampling exponent: %i ~= %"PRIu64"ms\n", period_exponent,
|
||||
prev_sample_period / 1000000ul);
|
||||
|
||||
if (!gen_perf_open(perf_ctx, metric_id, queryinfo->oa_format,
|
||||
if (!intel_perf_open(perf_ctx, metric_id, queryinfo->oa_format,
|
||||
period_exponent, perf_ctx->drm_fd,
|
||||
perf_ctx->hw_ctx))
|
||||
return false;
|
||||
|
@ -866,7 +866,7 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
|
|||
*/
|
||||
buf->refcount++;
|
||||
|
||||
gen_perf_query_result_clear(&query->oa.result);
|
||||
intel_perf_query_result_clear(&query->oa.result);
|
||||
query->oa.results_accumulated = false;
|
||||
|
||||
add_to_unaccumulated_query_list(perf_ctx, query);
|
||||
|
@ -899,10 +899,10 @@ gen_perf_begin_query(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_end_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query)
|
||||
intel_perf_end_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
|
||||
/* Ensure that the work associated with the queried commands will have
|
||||
* finished before taking our query end counter readings.
|
||||
|
@ -951,7 +951,7 @@ enum OaReadStatus {
|
|||
};
|
||||
|
||||
static enum OaReadStatus
|
||||
read_oa_samples_until(struct gen_perf_context *perf_ctx,
|
||||
read_oa_samples_until(struct intel_perf_context *perf_ctx,
|
||||
uint32_t start_timestamp,
|
||||
uint32_t end_timestamp)
|
||||
{
|
||||
|
@ -1022,14 +1022,14 @@ read_oa_samples_until(struct gen_perf_context *perf_ctx,
|
|||
* or an error arises.
|
||||
*/
|
||||
static bool
|
||||
read_oa_samples_for_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
read_oa_samples_for_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch)
|
||||
{
|
||||
uint32_t *start;
|
||||
uint32_t *last;
|
||||
uint32_t *end;
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
|
||||
/* We need the MI_REPORT_PERF_COUNT to land before we can start
|
||||
* accumulate. */
|
||||
|
@ -1068,11 +1068,11 @@ read_oa_samples_for_query(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_wait_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
intel_perf_wait_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct brw_bo *bo = NULL;
|
||||
|
||||
switch (query->queryinfo->kind) {
|
||||
|
@ -1103,11 +1103,11 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
bool
|
||||
gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
intel_perf_is_query_ready(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
|
||||
switch (query->queryinfo->kind) {
|
||||
case GEN_PERF_QUERY_TYPE_OA:
|
||||
|
@ -1137,8 +1137,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
|
|||
* results.
|
||||
*/
|
||||
static void
|
||||
drop_from_unaccumulated_query_list(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query)
|
||||
drop_from_unaccumulated_query_list(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query)
|
||||
{
|
||||
for (int i = 0; i < perf_ctx->unaccumulated_elements; i++) {
|
||||
if (perf_ctx->unaccumulated[i] == query) {
|
||||
|
@ -1177,10 +1177,10 @@ drop_from_unaccumulated_query_list(struct gen_perf_context *perf_ctx,
|
|||
* best with new queries.
|
||||
*/
|
||||
static void
|
||||
discard_all_queries(struct gen_perf_context *perf_ctx)
|
||||
discard_all_queries(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
while (perf_ctx->unaccumulated_elements) {
|
||||
struct gen_perf_query_object *query = perf_ctx->unaccumulated[0];
|
||||
struct intel_perf_query_object *query = perf_ctx->unaccumulated[0];
|
||||
|
||||
query->oa.results_accumulated = true;
|
||||
drop_from_unaccumulated_query_list(perf_ctx, query);
|
||||
|
@ -1219,8 +1219,8 @@ oa_report_ctx_id_valid(const struct intel_device_info *devinfo,
|
|||
* contexts running on the system.
|
||||
*/
|
||||
static void
|
||||
accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query)
|
||||
accumulate_oa_reports(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query)
|
||||
{
|
||||
const struct intel_device_info *devinfo = perf_ctx->devinfo;
|
||||
uint32_t *start;
|
||||
|
@ -1337,7 +1337,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
if (add) {
|
||||
gen_perf_query_result_accumulate(&query->oa.result,
|
||||
intel_perf_query_result_accumulate(&query->oa.result,
|
||||
query->queryinfo,
|
||||
devinfo,
|
||||
last, report);
|
||||
|
@ -1367,7 +1367,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||
|
||||
end:
|
||||
|
||||
gen_perf_query_result_accumulate(&query->oa.result, query->queryinfo,
|
||||
intel_perf_query_result_accumulate(&query->oa.result, query->queryinfo,
|
||||
devinfo, last, end);
|
||||
|
||||
query->oa.results_accumulated = true;
|
||||
|
@ -1382,10 +1382,10 @@ error:
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_delete_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query)
|
||||
intel_perf_delete_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
|
||||
/* We can assume that the frontend waits for a query to complete
|
||||
* before ever calling into here, so we don't have to worry about
|
||||
|
@ -1425,28 +1425,28 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx,
|
|||
*/
|
||||
if (--perf_ctx->n_query_instances == 0) {
|
||||
free_sample_bufs(perf_ctx);
|
||||
gen_perf_close(perf_ctx, query->queryinfo);
|
||||
intel_perf_close(perf_ctx, query->queryinfo);
|
||||
}
|
||||
|
||||
free(query);
|
||||
}
|
||||
|
||||
static int
|
||||
get_oa_counter_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
get_oa_counter_data(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
size_t data_size,
|
||||
uint8_t *data)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct gen_perf_query_info *queryinfo = query->queryinfo;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct intel_perf_query_info *queryinfo = query->queryinfo;
|
||||
int n_counters = queryinfo->n_counters;
|
||||
int written = 0;
|
||||
|
||||
for (int i = 0; i < n_counters; i++) {
|
||||
const struct gen_perf_query_counter *counter = &queryinfo->counters[i];
|
||||
const struct intel_perf_query_counter *counter = &queryinfo->counters[i];
|
||||
uint64_t *out_uint64;
|
||||
float *out_float;
|
||||
size_t counter_size = gen_perf_query_counter_get_size(counter);
|
||||
size_t counter_size = intel_perf_query_counter_get_size(counter);
|
||||
|
||||
if (counter_size) {
|
||||
switch (counter->data_type) {
|
||||
|
@ -1476,14 +1476,14 @@ get_oa_counter_data(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
static int
|
||||
get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
get_pipeline_stats_data(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
size_t data_size,
|
||||
uint8_t *data)
|
||||
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct gen_perf_query_info *queryinfo = query->queryinfo;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
const struct intel_perf_query_info *queryinfo = query->queryinfo;
|
||||
int n_counters = queryinfo->n_counters;
|
||||
uint8_t *p = data;
|
||||
|
||||
|
@ -1491,7 +1491,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
|
|||
uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t));
|
||||
|
||||
for (int i = 0; i < n_counters; i++) {
|
||||
const struct gen_perf_query_counter *counter = &queryinfo->counters[i];
|
||||
const struct intel_perf_query_counter *counter = &queryinfo->counters[i];
|
||||
uint64_t value = end[i] - start[i];
|
||||
|
||||
if (counter->pipeline_stat.numerator !=
|
||||
|
@ -1510,14 +1510,14 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
intel_perf_get_query_data(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch,
|
||||
int data_size,
|
||||
unsigned *data,
|
||||
unsigned *bytes_written)
|
||||
{
|
||||
struct gen_perf_config *perf_cfg = perf_ctx->perf;
|
||||
struct intel_perf_config *perf_cfg = perf_ctx->perf;
|
||||
int written = 0;
|
||||
|
||||
switch (query->queryinfo->kind) {
|
||||
|
@ -1537,7 +1537,7 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
|||
|
||||
uint32_t *begin_report = query->oa.map;
|
||||
uint32_t *end_report = query->oa.map + perf_cfg->query_layout.size;
|
||||
gen_perf_query_result_accumulate_fields(&query->oa.result,
|
||||
intel_perf_query_result_accumulate_fields(&query->oa.result,
|
||||
query->queryinfo,
|
||||
perf_ctx->devinfo,
|
||||
begin_report,
|
||||
|
@ -1554,7 +1554,7 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
|||
} else {
|
||||
const struct intel_device_info *devinfo = perf_ctx->devinfo;
|
||||
|
||||
written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size,
|
||||
written = intel_perf_query_result_write_mdapi((uint8_t *)data, data_size,
|
||||
devinfo, query->queryinfo,
|
||||
&query->oa.result);
|
||||
}
|
||||
|
@ -1574,15 +1574,15 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
|||
}
|
||||
|
||||
void
|
||||
gen_perf_dump_query_count(struct gen_perf_context *perf_ctx)
|
||||
intel_perf_dump_query_count(struct intel_perf_context *perf_ctx)
|
||||
{
|
||||
DBG("Queries: (Open queries = %d, OA users = %d)\n",
|
||||
perf_ctx->n_active_oa_queries, perf_ctx->n_oa_users);
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_dump_query(struct gen_perf_context *ctx,
|
||||
struct gen_perf_query_object *obj,
|
||||
intel_perf_dump_query(struct intel_perf_context *ctx,
|
||||
struct intel_perf_query_object *obj,
|
||||
void *current_batch)
|
||||
{
|
||||
switch (obj->queryinfo->kind) {
|
||||
|
@ -1590,7 +1590,7 @@ gen_perf_dump_query(struct gen_perf_context *ctx,
|
|||
case GEN_PERF_QUERY_TYPE_RAW:
|
||||
DBG("BO: %-4s OA data: %-10s %-15s\n",
|
||||
obj->oa.bo ? "yes," : "no,",
|
||||
gen_perf_is_query_ready(ctx, obj, current_batch) ? "ready," : "not ready,",
|
||||
intel_perf_is_query_ready(ctx, obj, current_batch) ? "ready," : "not ready,",
|
||||
obj->oa.results_accumulated ? "accumulated" : "not accumulated");
|
||||
break;
|
||||
case GEN_PERF_QUERY_TYPE_PIPELINE:
|
||||
|
|
|
@ -28,14 +28,14 @@
|
|||
|
||||
struct intel_device_info;
|
||||
|
||||
struct gen_perf_config;
|
||||
struct gen_perf_context;
|
||||
struct gen_perf_query_object;
|
||||
struct intel_perf_config;
|
||||
struct intel_perf_context;
|
||||
struct intel_perf_query_object;
|
||||
|
||||
struct gen_perf_context *gen_perf_new_context(void *parent);
|
||||
struct intel_perf_context *intel_perf_new_context(void *parent);
|
||||
|
||||
void gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_config *perf_cfg,
|
||||
void intel_perf_init_context(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_config *perf_cfg,
|
||||
void * mem_ctx, /* ralloc context */
|
||||
void * ctx, /* driver context (eg, brw_context) */
|
||||
void * bufmgr, /* eg brw_bufmgr */
|
||||
|
@ -43,39 +43,39 @@ void gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
|||
uint32_t hw_ctx,
|
||||
int drm_fd);
|
||||
|
||||
const struct gen_perf_query_info* gen_perf_query_info(const struct gen_perf_query_object *);
|
||||
const struct intel_perf_query_info* intel_perf_query_info(const struct intel_perf_query_object *);
|
||||
|
||||
struct gen_perf_config *gen_perf_config(struct gen_perf_context *ctx);
|
||||
struct intel_perf_config *intel_perf_config(struct intel_perf_context *ctx);
|
||||
|
||||
int gen_perf_active_queries(struct gen_perf_context *perf_ctx,
|
||||
const struct gen_perf_query_info *query);
|
||||
int intel_perf_active_queries(struct intel_perf_context *perf_ctx,
|
||||
const struct intel_perf_query_info *query);
|
||||
|
||||
struct gen_perf_query_object *
|
||||
gen_perf_new_query(struct gen_perf_context *, unsigned query_index);
|
||||
struct intel_perf_query_object *
|
||||
intel_perf_new_query(struct intel_perf_context *, unsigned query_index);
|
||||
|
||||
|
||||
bool gen_perf_begin_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query);
|
||||
void gen_perf_end_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query);
|
||||
void gen_perf_wait_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
bool intel_perf_begin_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query);
|
||||
void intel_perf_end_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query);
|
||||
void intel_perf_wait_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch);
|
||||
bool gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
bool intel_perf_is_query_ready(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch);
|
||||
void gen_perf_delete_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query);
|
||||
void gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
void intel_perf_delete_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query);
|
||||
void intel_perf_get_query_data(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *query,
|
||||
void *current_batch,
|
||||
int data_size,
|
||||
unsigned *data,
|
||||
unsigned *bytes_written);
|
||||
|
||||
void gen_perf_dump_query_count(struct gen_perf_context *perf_ctx);
|
||||
void gen_perf_dump_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *obj,
|
||||
void intel_perf_dump_query_count(struct intel_perf_context *perf_ctx);
|
||||
void intel_perf_dump_query(struct intel_perf_context *perf_ctx,
|
||||
struct intel_perf_query_object *obj,
|
||||
void *current_batch);
|
||||
|
||||
#endif /* INTEL_PERF_QUERY_H */
|
||||
|
|
|
@ -14,13 +14,13 @@ foreach hw : gen_hw_metrics
|
|||
gen_hw_metrics_xml_files += 'oa-@0@.xml'.format(hw)
|
||||
endforeach
|
||||
|
||||
gen_perf_sources = [
|
||||
intel_perf_sources = [
|
||||
'intel_perf.c',
|
||||
'intel_perf_query.c',
|
||||
'intel_perf_mdapi.c',
|
||||
]
|
||||
|
||||
gen_perf_sources += custom_target(
|
||||
intel_perf_sources += custom_target(
|
||||
'intel-perf-sources',
|
||||
input : gen_hw_metrics_xml_files,
|
||||
output : [ 'intel_perf_metrics.c', 'intel_perf_metrics.h' ],
|
||||
|
@ -33,7 +33,7 @@ gen_perf_sources += custom_target(
|
|||
|
||||
libintel_perf = static_library(
|
||||
'intel_perf',
|
||||
gen_perf_sources,
|
||||
intel_perf_sources,
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_intel,
|
||||
],
|
||||
|
|
|
@ -1988,7 +1988,7 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
|||
if (has_perf_query) {
|
||||
struct anv_query_pool *query_pool = submit->perf_query_pool;
|
||||
assert(submit->perf_query_pass < query_pool->n_passes);
|
||||
struct gen_perf_query_info *query_info =
|
||||
struct intel_perf_query_info *query_info =
|
||||
query_pool->pass_query[submit->perf_query_pass];
|
||||
|
||||
/* Some performance queries just the pipeline statistic HW, no need for
|
||||
|
|
|
@ -46,9 +46,9 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
|
|||
if (devinfo->ver < 8)
|
||||
return;
|
||||
|
||||
struct gen_perf_config *perf = gen_perf_new(NULL);
|
||||
struct intel_perf_config *perf = intel_perf_new(NULL);
|
||||
|
||||
gen_perf_init_metrics(perf, &device->info, fd, false /* pipeline statistics */);
|
||||
intel_perf_init_metrics(perf, &device->info, fd, false /* pipeline statistics */);
|
||||
|
||||
if (!perf->n_queries) {
|
||||
if (perf->platform_supported) {
|
||||
|
@ -67,7 +67,7 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
|
|||
* perf revision 2.
|
||||
*/
|
||||
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
|
||||
if (!gen_perf_has_hold_preemption(perf))
|
||||
if (!intel_perf_has_hold_preemption(perf))
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -76,10 +76,10 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
|
|||
/* Compute the number of commands we need to implement a performance
|
||||
* query.
|
||||
*/
|
||||
const struct gen_perf_query_field_layout *layout = &perf->query_layout;
|
||||
const struct intel_perf_query_field_layout *layout = &perf->query_layout;
|
||||
device->n_perf_query_commands = 0;
|
||||
for (uint32_t f = 0; f < layout->n_fields; f++) {
|
||||
struct gen_perf_query_field *field = &layout->fields[f];
|
||||
struct intel_perf_query_field *field = &layout->fields[f];
|
||||
|
||||
switch (field->type) {
|
||||
case GEN_PERF_QUERY_FIELD_TYPE_MI_RPC:
|
||||
|
@ -140,7 +140,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
|
|||
* enabled we would use only half on Gfx11 because of functional
|
||||
* requirements.
|
||||
*/
|
||||
if (gen_perf_has_global_sseu(device->physical->perf)) {
|
||||
if (intel_perf_has_global_sseu(device->physical->perf)) {
|
||||
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
|
||||
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ VkResult anv_AcquirePerformanceConfigurationINTEL(
|
|||
|
||||
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
|
||||
config->register_config =
|
||||
gen_perf_load_configuration(device->physical->perf, device->fd,
|
||||
intel_perf_load_configuration(device->physical->perf, device->fd,
|
||||
GEN_PERF_QUERY_GUID_MDAPI);
|
||||
if (!config->register_config) {
|
||||
vk_object_free(&device->vk, NULL, config);
|
||||
|
@ -233,7 +233,7 @@ VkResult anv_AcquirePerformanceConfigurationINTEL(
|
|||
}
|
||||
|
||||
int ret =
|
||||
gen_perf_store_configuration(device->physical->perf, device->fd,
|
||||
intel_perf_store_configuration(device->physical->perf, device->fd,
|
||||
config->register_config, NULL /* guid */);
|
||||
if (ret < 0) {
|
||||
ralloc_free(config->register_config);
|
||||
|
@ -303,7 +303,7 @@ void anv_UninitializePerformanceApiINTEL(
|
|||
|
||||
/* VK_KHR_performance_query */
|
||||
static const VkPerformanceCounterUnitKHR
|
||||
gen_perf_counter_unit_to_vk_unit[] = {
|
||||
intel_perf_counter_unit_to_vk_unit[] = {
|
||||
[GEN_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
|
||||
[GEN_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
|
||||
[GEN_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
|
||||
|
@ -324,7 +324,7 @@ gen_perf_counter_unit_to_vk_unit[] = {
|
|||
};
|
||||
|
||||
static const VkPerformanceCounterStorageKHR
|
||||
gen_perf_counter_data_type_to_vk_storage[] = {
|
||||
intel_perf_counter_data_type_to_vk_storage[] = {
|
||||
[GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
|
||||
[GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
|
||||
[GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
|
||||
|
@ -340,7 +340,7 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
|
|||
VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
|
||||
struct gen_perf_config *perf = pdevice->perf;
|
||||
struct intel_perf_config *perf = pdevice->perf;
|
||||
|
||||
uint32_t desc_count = *pCounterCount;
|
||||
|
||||
|
@ -348,12 +348,12 @@ VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
|
|||
VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
|
||||
|
||||
for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
|
||||
const struct gen_perf_query_counter *gen_counter = perf->counter_infos[c].counter;
|
||||
const struct intel_perf_query_counter *gen_counter = perf->counter_infos[c].counter;
|
||||
|
||||
vk_outarray_append(&out, counter) {
|
||||
counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
|
||||
counter->unit = intel_perf_counter_unit_to_vk_unit[gen_counter->units];
|
||||
counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
|
||||
counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
|
||||
counter->storage = intel_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
|
||||
|
||||
unsigned char sha1_result[20];
|
||||
_mesa_sha1_compute(gen_counter->symbol_name,
|
||||
|
@ -379,14 +379,14 @@ void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
|
|||
uint32_t* pNumPasses)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
|
||||
struct gen_perf_config *perf = pdevice->perf;
|
||||
struct intel_perf_config *perf = pdevice->perf;
|
||||
|
||||
if (!perf) {
|
||||
*pNumPasses = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
*pNumPasses = gen_perf_get_n_passes(perf,
|
||||
*pNumPasses = intel_perf_get_n_passes(perf,
|
||||
pPerformanceQueryCreateInfo->pCounterIndices,
|
||||
pPerformanceQueryCreateInfo->counterIndexCount,
|
||||
NULL);
|
||||
|
@ -397,8 +397,8 @@ VkResult anv_AcquireProfilingLockKHR(
|
|||
const VkAcquireProfilingLockInfoKHR* pInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct gen_perf_config *perf = device->physical->perf;
|
||||
struct gen_perf_query_info *first_metric_set = &perf->queries[0];
|
||||
struct intel_perf_config *perf = device->physical->perf;
|
||||
struct intel_perf_query_info *first_metric_set = &perf->queries[0];
|
||||
int fd = -1;
|
||||
|
||||
assert(device->perf_fd == -1);
|
||||
|
@ -426,13 +426,13 @@ void anv_ReleaseProfilingLockKHR(
|
|||
}
|
||||
|
||||
void
|
||||
anv_perf_write_pass_results(struct gen_perf_config *perf,
|
||||
anv_perf_write_pass_results(struct intel_perf_config *perf,
|
||||
struct anv_query_pool *pool, uint32_t pass,
|
||||
const struct gen_perf_query_result *accumulated_results,
|
||||
const struct intel_perf_query_result *accumulated_results,
|
||||
union VkPerformanceCounterResultKHR *results)
|
||||
{
|
||||
for (uint32_t c = 0; c < pool->n_counters; c++) {
|
||||
const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
|
||||
const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
|
||||
|
||||
if (counter_pass->pass != pass)
|
||||
continue;
|
||||
|
|
|
@ -84,9 +84,9 @@ struct anv_image_view;
|
|||
struct anv_instance;
|
||||
|
||||
struct intel_aux_map_context;
|
||||
struct gen_perf_config;
|
||||
struct gen_perf_counter_pass;
|
||||
struct gen_perf_query_result;
|
||||
struct intel_perf_config;
|
||||
struct intel_perf_counter_pass;
|
||||
struct intel_perf_query_result;
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vulkan/vk_icd.h>
|
||||
|
@ -903,7 +903,7 @@ struct anv_physical_device {
|
|||
bool supports_48bit_addresses;
|
||||
struct brw_compiler * compiler;
|
||||
struct isl_device isl_dev;
|
||||
struct gen_perf_config * perf;
|
||||
struct intel_perf_config * perf;
|
||||
/*
|
||||
* Number of commands required to implement a performance query begin +
|
||||
* end.
|
||||
|
@ -4393,9 +4393,9 @@ struct anv_query_pool {
|
|||
uint32_t data_offset;
|
||||
uint32_t snapshot_size;
|
||||
uint32_t n_counters;
|
||||
struct gen_perf_counter_pass *counter_pass;
|
||||
struct intel_perf_counter_pass *counter_pass;
|
||||
uint32_t n_passes;
|
||||
struct gen_perf_query_info **pass_query;
|
||||
struct intel_perf_query_info **pass_query;
|
||||
};
|
||||
|
||||
static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
|
||||
|
@ -4444,16 +4444,16 @@ anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
|
|||
struct anv_performance_configuration_intel {
|
||||
struct vk_object_base base;
|
||||
|
||||
struct gen_perf_registers *register_config;
|
||||
struct intel_perf_registers *register_config;
|
||||
|
||||
uint64_t config_id;
|
||||
};
|
||||
|
||||
void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
|
||||
void anv_device_perf_init(struct anv_device *device);
|
||||
void anv_perf_write_pass_results(struct gen_perf_config *perf,
|
||||
void anv_perf_write_pass_results(struct intel_perf_config *perf,
|
||||
struct anv_query_pool *pool, uint32_t pass,
|
||||
const struct gen_perf_query_result *accumulated_results,
|
||||
const struct intel_perf_query_result *accumulated_results,
|
||||
union VkPerformanceCounterResultKHR *results);
|
||||
|
||||
#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
|
||||
|
|
|
@ -67,8 +67,8 @@ VkResult genX(CreateQueryPool)(
|
|||
const struct anv_physical_device *pdevice = device->physical;
|
||||
#if GFX_VER >= 8
|
||||
const VkQueryPoolPerformanceCreateInfoKHR *perf_query_info = NULL;
|
||||
struct gen_perf_counter_pass *counter_pass;
|
||||
struct gen_perf_query_info **pass_query;
|
||||
struct intel_perf_counter_pass *counter_pass;
|
||||
struct intel_perf_query_info **pass_query;
|
||||
uint32_t n_passes = 0;
|
||||
#endif
|
||||
uint32_t data_offset = 0;
|
||||
|
@ -118,7 +118,7 @@ VkResult genX(CreateQueryPool)(
|
|||
uint64s_per_slot = 1 + 4;
|
||||
break;
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
const struct gen_perf_query_field_layout *layout =
|
||||
const struct intel_perf_query_field_layout *layout =
|
||||
&pdevice->perf->query_layout;
|
||||
|
||||
uint64s_per_slot = 2; /* availability + marker */
|
||||
|
@ -132,18 +132,18 @@ VkResult genX(CreateQueryPool)(
|
|||
}
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
const struct gen_perf_query_field_layout *layout =
|
||||
const struct intel_perf_query_field_layout *layout =
|
||||
&pdevice->perf->query_layout;
|
||||
|
||||
perf_query_info = vk_find_struct_const(pCreateInfo->pNext,
|
||||
QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
|
||||
n_passes = gen_perf_get_n_passes(pdevice->perf,
|
||||
n_passes = intel_perf_get_n_passes(pdevice->perf,
|
||||
perf_query_info->pCounterIndices,
|
||||
perf_query_info->counterIndexCount,
|
||||
NULL);
|
||||
vk_multialloc_add(&ma, &counter_pass, struct gen_perf_counter_pass,
|
||||
vk_multialloc_add(&ma, &counter_pass, struct intel_perf_counter_pass,
|
||||
perf_query_info->counterIndexCount);
|
||||
vk_multialloc_add(&ma, &pass_query, struct gen_perf_query_info *,
|
||||
vk_multialloc_add(&ma, &pass_query, struct intel_perf_query_info *,
|
||||
n_passes);
|
||||
uint64s_per_slot = 4 /* availability + small batch */;
|
||||
/* Align to the requirement of the layout */
|
||||
|
@ -181,13 +181,13 @@ VkResult genX(CreateQueryPool)(
|
|||
pool->snapshot_size = (pool->pass_size - data_offset) / 2;
|
||||
pool->n_counters = perf_query_info->counterIndexCount;
|
||||
pool->counter_pass = counter_pass;
|
||||
gen_perf_get_counters_passes(pdevice->perf,
|
||||
intel_perf_get_counters_passes(pdevice->perf,
|
||||
perf_query_info->pCounterIndices,
|
||||
perf_query_info->counterIndexCount,
|
||||
pool->counter_pass);
|
||||
pool->n_passes = n_passes;
|
||||
pool->pass_query = pass_query;
|
||||
gen_perf_get_n_passes(pdevice->perf,
|
||||
intel_perf_get_n_passes(pdevice->perf,
|
||||
perf_query_info->pCounterIndices,
|
||||
perf_query_info->counterIndexCount,
|
||||
pool->pass_query);
|
||||
|
@ -561,10 +561,10 @@ VkResult genX(GetQueryPoolResults)(
|
|||
assert((flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
|
||||
VK_QUERY_RESULT_PARTIAL_BIT)) == 0);
|
||||
for (uint32_t p = 0; p < pool->n_passes; p++) {
|
||||
const struct gen_perf_query_info *query = pool->pass_query[p];
|
||||
struct gen_perf_query_result result;
|
||||
gen_perf_query_result_clear(&result);
|
||||
gen_perf_query_result_accumulate_fields(&result, query, &device->info,
|
||||
const struct intel_perf_query_info *query = pool->pass_query[p];
|
||||
struct intel_perf_query_result result;
|
||||
intel_perf_query_result_clear(&result);
|
||||
intel_perf_query_result_accumulate_fields(&result, query, &device->info,
|
||||
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, false),
|
||||
pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, true),
|
||||
false /* no_oa_accumulate */);
|
||||
|
@ -578,18 +578,18 @@ VkResult genX(GetQueryPoolResults)(
|
|||
if (!write_results)
|
||||
break;
|
||||
const void *query_data = query_slot(pool, firstQuery + i);
|
||||
const struct gen_perf_query_info *query = &device->physical->perf->queries[0];
|
||||
struct gen_perf_query_result result;
|
||||
gen_perf_query_result_clear(&result);
|
||||
gen_perf_query_result_accumulate_fields(&result, query, &device->info,
|
||||
const struct intel_perf_query_info *query = &device->physical->perf->queries[0];
|
||||
struct intel_perf_query_result result;
|
||||
intel_perf_query_result_clear(&result);
|
||||
intel_perf_query_result_accumulate_fields(&result, query, &device->info,
|
||||
query_data + intel_perf_query_data_offset(pool, false),
|
||||
query_data + intel_perf_query_data_offset(pool, true),
|
||||
false /* no_oa_accumulate */);
|
||||
gen_perf_query_result_write_mdapi(pData, stride,
|
||||
intel_perf_query_result_write_mdapi(pData, stride,
|
||||
&device->info,
|
||||
query, &result);
|
||||
const uint64_t *marker = query_data + intel_perf_marker_offset();
|
||||
gen_perf_query_mdapi_write_marker(pData, stride, &device->info, *marker);
|
||||
intel_perf_query_mdapi_write_marker(pData, stride, &device->info, *marker);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -850,13 +850,13 @@ emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer,
|
|||
struct anv_address query_addr,
|
||||
bool end)
|
||||
{
|
||||
const struct gen_perf_query_field_layout *layout =
|
||||
const struct intel_perf_query_field_layout *layout =
|
||||
&cmd_buffer->device->physical->perf->query_layout;
|
||||
struct anv_address data_addr =
|
||||
anv_address_add(query_addr, intel_perf_query_data_offset(pool, end));
|
||||
|
||||
for (uint32_t f = 0; f < layout->n_fields; f++) {
|
||||
const struct gen_perf_query_field *field =
|
||||
const struct intel_perf_query_field *field =
|
||||
&layout->fields[end ? f : (layout->n_fields - 1 - f)];
|
||||
|
||||
switch (field->type) {
|
||||
|
@ -946,12 +946,12 @@ void genX(CmdBeginQueryIndexedEXT)(
|
|||
return;
|
||||
|
||||
const struct anv_physical_device *pdevice = cmd_buffer->device->physical;
|
||||
const struct gen_perf_query_field_layout *layout = &pdevice->perf->query_layout;
|
||||
const struct intel_perf_query_field_layout *layout = &pdevice->perf->query_layout;
|
||||
|
||||
uint32_t reloc_idx = 0;
|
||||
for (uint32_t end = 0; end < 2; end++) {
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
const struct gen_perf_query_field *field =
|
||||
const struct intel_perf_query_field *field =
|
||||
&layout->fields[end ? r : (layout->n_fields - 1 - r)];
|
||||
struct mi_value reg_addr =
|
||||
mi_iadd(
|
||||
|
@ -999,7 +999,7 @@ void genX(CmdBeginQueryIndexedEXT)(
|
|||
|
||||
cmd_buffer->perf_reloc_idx = 0;
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
const struct gen_perf_query_field *field =
|
||||
const struct intel_perf_query_field *field =
|
||||
&layout->fields[layout->n_fields - 1 - r];
|
||||
void *dws;
|
||||
|
||||
|
@ -1134,11 +1134,11 @@ void genX(CmdEndQueryIndexedEXT)(
|
|||
return;
|
||||
|
||||
const struct anv_physical_device *pdevice = cmd_buffer->device->physical;
|
||||
const struct gen_perf_query_field_layout *layout = &pdevice->perf->query_layout;
|
||||
const struct intel_perf_query_field_layout *layout = &pdevice->perf->query_layout;
|
||||
|
||||
void *dws;
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
const struct gen_perf_query_field *field = &layout->fields[r];
|
||||
const struct intel_perf_query_field *field = &layout->fields[r];
|
||||
|
||||
switch (field->type) {
|
||||
case GEN_PERF_QUERY_FIELD_TYPE_MI_RPC:
|
||||
|
|
|
@ -974,7 +974,7 @@ brw_create_context(gl_api api,
|
|||
return false;
|
||||
}
|
||||
brw->mem_ctx = ralloc_context(NULL);
|
||||
brw->perf_ctx = gen_perf_new_context(brw->mem_ctx);
|
||||
brw->perf_ctx = intel_perf_new_context(brw->mem_ctx);
|
||||
|
||||
driContextPriv->driverPrivate = brw;
|
||||
brw->driContext = driContextPriv;
|
||||
|
|
|
@ -683,7 +683,7 @@ enum brw_predicate_state {
|
|||
struct shader_times;
|
||||
|
||||
struct intel_l3_config;
|
||||
struct gen_perf;
|
||||
struct intel_perf;
|
||||
|
||||
struct brw_uploader {
|
||||
struct brw_bufmgr *bufmgr;
|
||||
|
@ -1187,7 +1187,7 @@ struct brw_context
|
|||
bool supported;
|
||||
} predicate;
|
||||
|
||||
struct gen_perf_context *perf_ctx;
|
||||
struct intel_perf_context *perf_ctx;
|
||||
|
||||
int num_atoms[BRW_NUM_PIPELINES];
|
||||
const struct brw_tracked_state render_atoms[76];
|
||||
|
|
|
@ -89,7 +89,7 @@
|
|||
|
||||
struct brw_perf_query_object {
|
||||
struct gl_perf_query_object base;
|
||||
struct gen_perf_query_object *query;
|
||||
struct intel_perf_query_object *query;
|
||||
};
|
||||
|
||||
/** Downcasting convenience macro. */
|
||||
|
@ -114,23 +114,23 @@ static void
|
|||
dump_perf_query_callback(void *query_void, void *brw_void)
|
||||
{
|
||||
struct brw_context *ctx = brw_void;
|
||||
struct gen_perf_context *perf_ctx = ctx->perf_ctx;
|
||||
struct intel_perf_context *perf_ctx = ctx->perf_ctx;
|
||||
struct gl_perf_query_object *o = query_void;
|
||||
struct brw_perf_query_object * brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
DBG("%4d: %-6s %-8s ",
|
||||
o->Id,
|
||||
o->Used ? "Dirty," : "New,",
|
||||
o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
|
||||
gen_perf_dump_query(perf_ctx, obj, &ctx->batch);
|
||||
intel_perf_dump_query(perf_ctx, obj, &ctx->batch);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_perf_queries(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
gen_perf_dump_query_count(brw->perf_ctx);
|
||||
intel_perf_dump_query_count(brw->perf_ctx);
|
||||
_mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw);
|
||||
}
|
||||
|
||||
|
@ -146,18 +146,18 @@ brw_get_perf_query_info(struct gl_context *ctx,
|
|||
GLuint *n_active)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gen_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx);
|
||||
const struct gen_perf_query_info *query = &perf_cfg->queries[query_index];
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
|
||||
const struct intel_perf_query_info *query = &perf_cfg->queries[query_index];
|
||||
|
||||
*name = query->name;
|
||||
*data_size = query->data_size;
|
||||
*n_counters = query->n_counters;
|
||||
*n_active = gen_perf_active_queries(perf_ctx, query);
|
||||
*n_active = intel_perf_active_queries(perf_ctx, query);
|
||||
}
|
||||
|
||||
static GLuint
|
||||
intel_counter_type_enum_to_gl_type(enum gen_perf_counter_type type)
|
||||
intel_counter_type_enum_to_gl_type(enum intel_perf_counter_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case GEN_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL;
|
||||
|
@ -172,7 +172,7 @@ intel_counter_type_enum_to_gl_type(enum gen_perf_counter_type type)
|
|||
}
|
||||
|
||||
static GLuint
|
||||
gen_counter_data_type_to_gl_type(enum gen_perf_counter_data_type type)
|
||||
gen_counter_data_type_to_gl_type(enum intel_perf_counter_data_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case GEN_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL;
|
||||
|
@ -201,16 +201,16 @@ brw_get_perf_counter_info(struct gl_context *ctx,
|
|||
GLuint64 *raw_max)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gen_perf_config *perf_cfg = gen_perf_config(brw->perf_ctx);
|
||||
const struct gen_perf_query_info *query =
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(brw->perf_ctx);
|
||||
const struct intel_perf_query_info *query =
|
||||
&perf_cfg->queries[query_index];
|
||||
const struct gen_perf_query_counter *counter =
|
||||
const struct intel_perf_query_counter *counter =
|
||||
&query->counters[counter_index];
|
||||
|
||||
*name = counter->name;
|
||||
*desc = counter->desc;
|
||||
*offset = counter->offset;
|
||||
*data_size = gen_perf_query_counter_get_size(counter);
|
||||
*data_size = intel_perf_query_counter_get_size(counter);
|
||||
*type_enum = intel_counter_type_enum_to_gl_type(counter->type);
|
||||
*data_type_enum = gen_counter_data_type_to_gl_type(counter->data_type);
|
||||
*raw_max = counter->raw_max;
|
||||
|
@ -233,8 +233,8 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct gen_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
|
||||
/* We can assume the frontend hides mistaken attempts to Begin a
|
||||
* query object multiple times before its End. Similarly if an
|
||||
|
@ -247,7 +247,7 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||
|
||||
DBG("Begin(%d)\n", o->Id);
|
||||
|
||||
bool ret = gen_perf_begin_query(perf_ctx, obj);
|
||||
bool ret = intel_perf_begin_query(perf_ctx, obj);
|
||||
|
||||
if (INTEL_DEBUG & DEBUG_PERFMON)
|
||||
dump_perf_queries(brw);
|
||||
|
@ -264,11 +264,11 @@ brw_end_perf_query(struct gl_context *ctx,
|
|||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct gen_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
|
||||
DBG("End(%d)\n", o->Id);
|
||||
gen_perf_end_query(perf_ctx, obj);
|
||||
intel_perf_end_query(perf_ctx, obj);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -276,11 +276,11 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
|
|||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
assert(!o->Ready);
|
||||
|
||||
gen_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
|
||||
intel_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -289,12 +289,12 @@ brw_is_perf_query_ready(struct gl_context *ctx,
|
|||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
if (o->Ready)
|
||||
return true;
|
||||
|
||||
return gen_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
|
||||
return intel_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -309,7 +309,7 @@ brw_get_perf_query_data(struct gl_context *ctx,
|
|||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
assert(brw_is_perf_query_ready(ctx, o));
|
||||
|
||||
|
@ -323,7 +323,7 @@ brw_get_perf_query_data(struct gl_context *ctx,
|
|||
*/
|
||||
assert(o->Ready);
|
||||
|
||||
gen_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
|
||||
intel_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
|
||||
data_size, data, bytes_written);
|
||||
}
|
||||
|
||||
|
@ -331,14 +331,14 @@ static struct gl_perf_query_object *
|
|||
brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gen_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct gen_perf_query_object * obj = gen_perf_new_query(perf_ctx, query_index);
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index);
|
||||
if (unlikely(!obj))
|
||||
return NULL;
|
||||
|
||||
struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
|
||||
if (unlikely(!brw_query)) {
|
||||
gen_perf_delete_query(perf_ctx, obj);
|
||||
intel_perf_delete_query(perf_ctx, obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -355,8 +355,8 @@ brw_delete_perf_query(struct gl_context *ctx,
|
|||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct gen_perf_query_object *obj = brw_query->query;
|
||||
struct gen_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
|
||||
/* We can assume that the frontend waits for a query to complete
|
||||
* before ever calling into here, so we don't have to worry about
|
||||
|
@ -367,7 +367,7 @@ brw_delete_perf_query(struct gl_context *ctx,
|
|||
|
||||
DBG("Delete(%d)\n", o->Id);
|
||||
|
||||
gen_perf_delete_query(perf_ctx, obj);
|
||||
intel_perf_delete_query(perf_ctx, obj);
|
||||
free(brw_query);
|
||||
}
|
||||
|
||||
|
@ -478,8 +478,8 @@ brw_init_perf_query_info(struct gl_context *ctx)
|
|||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
struct gen_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx);
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
|
||||
|
||||
if (perf_cfg)
|
||||
return perf_cfg->n_queries;
|
||||
|
@ -487,7 +487,7 @@ brw_init_perf_query_info(struct gl_context *ctx)
|
|||
if (!oa_metrics_kernel_support(brw->screen->fd, devinfo))
|
||||
return 0;
|
||||
|
||||
perf_cfg = gen_perf_new(brw->mem_ctx);
|
||||
perf_cfg = intel_perf_new(brw->mem_ctx);
|
||||
|
||||
perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
|
||||
perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
|
||||
|
@ -504,9 +504,9 @@ brw_init_perf_query_info(struct gl_context *ctx)
|
|||
perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering;
|
||||
perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy;
|
||||
|
||||
gen_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
|
||||
intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
|
||||
devinfo, brw->hw_ctx, brw->screen->fd);
|
||||
gen_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
|
||||
intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
|
||||
true /* pipeline stats */);
|
||||
|
||||
return perf_cfg->n_queries;
|
||||
|
|
Loading…
Reference in New Issue