freedreno/pps: Expose same counters as blob

Expose most of the counters exposed by blob. By faking the value of
counters returned from kgsl I found the exact underlying counters and
constant coefficients being used.

Note, coefficients for counters that depend on time are NOT verified.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14323>
This commit is contained in:
Danylo Piliaiev 2021-12-28 21:44:55 +02:00 committed by Marge Bot
parent 03ab9d895e
commit b84f059680
1 changed files with 319 additions and 14 deletions

View File

@ -16,6 +16,27 @@
namespace pps
{
double
safe_div(uint64_t a, uint64_t b)
{
if (b == 0)
return 0;
return a / static_cast<double>(b);
}
float
percent(uint64_t a, uint64_t b)
{
/* Sometimes we get bogus values but we want for the timeline
* to look nice without higher than 100% values.
*/
if (b == 0 || a > b)
return 0;
return 100.f * (a / static_cast<double>(b));
}
uint64_t
FreedrenoDriver::get_min_sampling_period_ns()
{
@ -45,14 +66,58 @@ FreedrenoDriver::setup_a6xx_counters()
auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
auto PERF_TP_OUTPUT_PIXELS = countable("PERF_TP_OUTPUT_PIXELS");
auto PERF_TP_OUTPUT_PIXELS_ANISO = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
auto PERF_TSE_INPUT_PRIM = countable("PERF_TSE_INPUT_PRIM");
auto PERF_TSE_CLIPPED_PRIM = countable("PERF_TSE_CLIPPED_PRIM");
auto PERF_TSE_TRIVAL_REJ_PRIM = countable("PERF_TSE_TRIVAL_REJ_PRIM");
auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
/* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
// auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
/*
* And then setup the derived counters that we are exporting to
* pps based on the captured countable values
* pps based on the captured countable values.
*
* We try to expose the same counters as blob:
* https://gpuinspector.dev/docs/gpu-counters/qualcomm
*/
counter("GPU Frequency", Counter::Units::Hertz, [=]() {
@ -61,14 +126,7 @@ FreedrenoDriver::setup_a6xx_counters()
);
counter("GPU % Utilization", Counter::Units::Percent, [=]() {
return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
}
);
// This one is a bit of a guess, but seems plausible..
counter("ALU / Fragment", Counter::Units::None, [=]() {
return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
}
);
@ -78,12 +136,259 @@ FreedrenoDriver::setup_a6xx_counters()
);
counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);
return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
}
);
// TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
// for what blob exposes
/* TODO: verify */
counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
}
);
/* TODO: verify */
counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
}
);
counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
}
);
counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
}
);
counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
}
);
/* TODO: verify */
counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
}
);
counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
return PERF_TSE_INPUT_PRIM * (1.f / time);
}
);
counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
}
);
counter("% Prims Clipped", Counter::Units::Percent, [=]() {
return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
}
);
counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
}
);
counter("Reused Vertices / Second", Counter::Units::None, [=]() {
return PERF_PC_VERTEX_HITS * (1.f / time);
}
);
counter("Average Polygon Area", Counter::Units::None, [=]() {
return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
}
);
/* TODO: find formula */
// counter("% Shaders Busy", Counter::Units::Percent, [=]() {
// return 100.0 * 0;
// }
// );
counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
return PERF_PC_VS_INVOCATIONS * (1.f / time);
}
);
counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
return PERF_HLSQ_QUADS * 4 * (1.f / time);
}
);
counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
}
);
counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
}
);
counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
}
);
counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
}
);
counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
}
);
counter("Textures / Vertex", Counter::Units::None, [=]() {
return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
}
);
counter("Textures / Fragment", Counter::Units::None, [=]() {
return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
}
);
counter("ALU / Vertex", Counter::Units::None, [=]() {
return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
}
);
counter("EFU / Vertex", Counter::Units::None, [=]() {
return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
}
);
counter("ALU / Fragment", Counter::Units::None, [=]() {
return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
}
);
counter("EFU / Fragment", Counter::Units::None, [=]() {
return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
}
);
counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
(PERF_SP_ANY_EU_WORKING_VS_STAGE +
PERF_SP_ANY_EU_WORKING_FS_STAGE +
PERF_SP_ANY_EU_WORKING_CS_STAGE));
}
);
counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
(PERF_SP_ANY_EU_WORKING_VS_STAGE +
PERF_SP_ANY_EU_WORKING_FS_STAGE +
PERF_SP_ANY_EU_WORKING_CS_STAGE));
}
);
counter("% Time Compute", Counter::Units::Percent, [=]() {
return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
(PERF_SP_ANY_EU_WORKING_VS_STAGE +
PERF_SP_ANY_EU_WORKING_FS_STAGE +
PERF_SP_ANY_EU_WORKING_CS_STAGE));
}
);
counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
PERF_SP_BUSY_CYCLES);
}
);
counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
}
);
counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
}
);
counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
}
);
counter("% Linear Filtered", Counter::Units::Percent, [=]() {
return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
}
);
counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
}
);
counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
}
);
/* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
// counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
// return * (1.f / time);
// }
// );
/* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
// counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
// return * (1.f / time);
// }
// );
/* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
// counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
// return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
// }
// );
/* TODO: verify */
counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
}
);
/* TODO: verify */
counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
}
);
counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
}
);
counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
}
);
counter("Preemptions / second", Counter::Units::None, [=]() {
return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
}
);
counter("Avg Preemption Delay", Counter::Units::None, [=]() {
return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
}
);
}
/**