From 43fcb72d2c288d10261ca51a909beabdee1ee761 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 7 Mar 2018 10:46:58 +0000 Subject: [PATCH] i965: perf: snapshot RPSTAT register This register contains the current/previous frequency of the GT, it's one of the value GPA would like to have as part of their queries. v2: Don't use this register on baytrail/cherryview (Ken) Use GET_FIELD() macro (Ken) Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 12 +++++ .../drivers/dri/i965/brw_performance_query.c | 50 +++++++++++++++++++ .../drivers/dri/i965/brw_performance_query.h | 5 ++ 3 files changed, 67 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8bf6f68b67c..855f1c7d744 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1656,6 +1656,18 @@ enum brw_pixel_shader_coverage_mask_mode { #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) +#define GEN7_RPSTAT1 0xA01C +#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 +#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7) +#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0 +#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0) + +#define GEN9_RPSTAT0 0xA01C +#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23 +#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23) +#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0 +#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0) + #define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */ # define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7) # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 44cac85c6e6..32cf96a333d 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -216,6 +216,8 @@ brw_perf_query(struct gl_perf_query_object *o) #define MI_RPC_BO_SIZE 4096 #define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2) +#define MI_FREQ_START_OFFSET_BYTES (3072) +#define MI_FREQ_END_OFFSET_BYTES (3076) /******************************************************************************/ @@ -946,6 +948,21 @@ close_perf(struct brw_context *brw) } } +static void +capture_frequency_stat_register(struct brw_context *brw, + struct brw_bo *bo, + uint32_t bo_offset) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + if (devinfo->gen >= 7 && devinfo->gen <= 8 && + !devinfo->is_baytrail && !devinfo->is_cherryview) { + brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset); + } else if (devinfo->gen >= 9) { + brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset); + } +} + /** * Driver hook for glBeginPerfQueryINTEL(). */ @@ -1138,6 +1155,8 @@ brw_begin_perf_query(struct gl_context *ctx, /* Take a starting OA counter snapshot. */ brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0, obj->oa.begin_report_id); + capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_START_OFFSET_BYTES); + ++brw->perfquery.n_active_oa_queries; /* No already-buffered samples can possibly be associated with this query @@ -1221,6 +1240,7 @@ brw_end_perf_query(struct gl_context *ctx, */ if (!obj->oa.results_accumulated) { /* Take an ending OA counter snapshot. */ + capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_END_OFFSET_BYTES); brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, MI_RPC_BO_END_OFFSET_BYTES, obj->oa.begin_report_id + 1); @@ -1321,6 +1341,35 @@ brw_is_perf_query_ready(struct gl_context *ctx, return false; } +static void +read_gt_frequency(struct brw_context *brw, + struct brw_perf_query_object *obj) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)), + end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES)); + + switch (devinfo->gen) { + case 7: + case 8: + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL; + break; + case 9: + case 10: + case 11: + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; + break; + default: + unreachable("unexpected gen"); + } + + /* Put the numbers into Hz. */ + obj->oa.gt_frequency[0] *= 1000000ULL; + obj->oa.gt_frequency[1] *= 1000000ULL; +} + static int get_oa_counter_data(struct brw_context *brw, struct brw_perf_query_object *obj, @@ -1332,6 +1381,7 @@ get_oa_counter_data(struct brw_context *brw, int written = 0; if (!obj->oa.results_accumulated) { + read_gt_frequency(brw, obj); accumulate_oa_reports(brw, obj); assert(obj->oa.results_accumulated); diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index f62786f7f1c..f8732738b4e 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -113,6 +113,11 @@ struct brw_perf_query_object * Number of reports accumulated to produce the results. */ uint32_t reports_accumulated; + + /** + * Frequency of the GT at begin and end of the query. + */ + uint64_t gt_frequency[2]; } oa; struct {