i965: perf: snapshot RPSTAT register
This register contains the current/previous frequency of the GT, it's one of the value GPA would like to have as part of their queries. v2: Don't use this register on baytrail/cherryview (Ken) Use GET_FIELD() macro (Ken) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
d71b442416
commit
43fcb72d2c
|
@ -1656,6 +1656,18 @@ enum brw_pixel_shader_coverage_mask_mode {
|
||||||
#define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
|
#define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
|
||||||
# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
|
# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
|
||||||
|
|
||||||
|
#define GEN7_RPSTAT1 0xA01C
|
||||||
|
#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7
|
||||||
|
#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7)
|
||||||
|
#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0
|
||||||
|
#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0)
|
||||||
|
|
||||||
|
#define GEN9_RPSTAT0 0xA01C
|
||||||
|
#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23
|
||||||
|
#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23)
|
||||||
|
#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
|
||||||
|
#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
|
||||||
|
|
||||||
#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */
|
#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */
|
||||||
# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7)
|
# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7)
|
||||||
# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
|
# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
|
||||||
|
|
|
@ -216,6 +216,8 @@ brw_perf_query(struct gl_perf_query_object *o)
|
||||||
|
|
||||||
#define MI_RPC_BO_SIZE 4096
|
#define MI_RPC_BO_SIZE 4096
|
||||||
#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
|
#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
|
||||||
|
#define MI_FREQ_START_OFFSET_BYTES (3072)
|
||||||
|
#define MI_FREQ_END_OFFSET_BYTES (3076)
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
@ -946,6 +948,21 @@ close_perf(struct brw_context *brw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
capture_frequency_stat_register(struct brw_context *brw,
|
||||||
|
struct brw_bo *bo,
|
||||||
|
uint32_t bo_offset)
|
||||||
|
{
|
||||||
|
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||||
|
|
||||||
|
if (devinfo->gen >= 7 && devinfo->gen <= 8 &&
|
||||||
|
!devinfo->is_baytrail && !devinfo->is_cherryview) {
|
||||||
|
brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset);
|
||||||
|
} else if (devinfo->gen >= 9) {
|
||||||
|
brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Driver hook for glBeginPerfQueryINTEL().
|
* Driver hook for glBeginPerfQueryINTEL().
|
||||||
*/
|
*/
|
||||||
|
@ -1138,6 +1155,8 @@ brw_begin_perf_query(struct gl_context *ctx,
|
||||||
/* Take a starting OA counter snapshot. */
|
/* Take a starting OA counter snapshot. */
|
||||||
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
|
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
|
||||||
obj->oa.begin_report_id);
|
obj->oa.begin_report_id);
|
||||||
|
capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_START_OFFSET_BYTES);
|
||||||
|
|
||||||
++brw->perfquery.n_active_oa_queries;
|
++brw->perfquery.n_active_oa_queries;
|
||||||
|
|
||||||
/* No already-buffered samples can possibly be associated with this query
|
/* No already-buffered samples can possibly be associated with this query
|
||||||
|
@ -1221,6 +1240,7 @@ brw_end_perf_query(struct gl_context *ctx,
|
||||||
*/
|
*/
|
||||||
if (!obj->oa.results_accumulated) {
|
if (!obj->oa.results_accumulated) {
|
||||||
/* Take an ending OA counter snapshot. */
|
/* Take an ending OA counter snapshot. */
|
||||||
|
capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_END_OFFSET_BYTES);
|
||||||
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
|
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
|
||||||
MI_RPC_BO_END_OFFSET_BYTES,
|
MI_RPC_BO_END_OFFSET_BYTES,
|
||||||
obj->oa.begin_report_id + 1);
|
obj->oa.begin_report_id + 1);
|
||||||
|
@ -1321,6 +1341,35 @@ brw_is_perf_query_ready(struct gl_context *ctx,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
read_gt_frequency(struct brw_context *brw,
|
||||||
|
struct brw_perf_query_object *obj)
|
||||||
|
{
|
||||||
|
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||||
|
uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
|
||||||
|
end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
|
||||||
|
|
||||||
|
switch (devinfo->gen) {
|
||||||
|
case 7:
|
||||||
|
case 8:
|
||||||
|
obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
|
||||||
|
obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
case 10:
|
||||||
|
case 11:
|
||||||
|
obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
|
||||||
|
obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("unexpected gen");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Put the numbers into Hz. */
|
||||||
|
obj->oa.gt_frequency[0] *= 1000000ULL;
|
||||||
|
obj->oa.gt_frequency[1] *= 1000000ULL;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
get_oa_counter_data(struct brw_context *brw,
|
get_oa_counter_data(struct brw_context *brw,
|
||||||
struct brw_perf_query_object *obj,
|
struct brw_perf_query_object *obj,
|
||||||
|
@ -1332,6 +1381,7 @@ get_oa_counter_data(struct brw_context *brw,
|
||||||
int written = 0;
|
int written = 0;
|
||||||
|
|
||||||
if (!obj->oa.results_accumulated) {
|
if (!obj->oa.results_accumulated) {
|
||||||
|
read_gt_frequency(brw, obj);
|
||||||
accumulate_oa_reports(brw, obj);
|
accumulate_oa_reports(brw, obj);
|
||||||
assert(obj->oa.results_accumulated);
|
assert(obj->oa.results_accumulated);
|
||||||
|
|
||||||
|
|
|
@ -113,6 +113,11 @@ struct brw_perf_query_object
|
||||||
* Number of reports accumulated to produce the results.
|
* Number of reports accumulated to produce the results.
|
||||||
*/
|
*/
|
||||||
uint32_t reports_accumulated;
|
uint32_t reports_accumulated;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Frequency of the GT at begin and end of the query.
|
||||||
|
*/
|
||||||
|
uint64_t gt_frequency[2];
|
||||||
} oa;
|
} oa;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
Loading…
Reference in New Issue