pan/bi: Report cycle counts

Based on analysis of results from the Mali Offline Compiler. I am
uncertain how well these translate to real life, and they are
normalized counts only...

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11733>
This commit is contained in:
Alyssa Rosenzweig 2021-06-24 11:04:25 -04:00 committed by Marge Bot
parent a38ad8ec65
commit b7ca125278
3 changed files with 88 additions and 13 deletions

View File

@ -2882,29 +2882,100 @@ emit_cf_list(bi_context *ctx, struct exec_list *list)
/* shader-db stuff */
struct bi_stats {
unsigned nr_clauses, nr_tuples, nr_ins;
unsigned nr_arith, nr_texture, nr_varying, nr_ldst;
};
static void
bi_count_tuple_stats(bi_clause *clause, bi_tuple *tuple, struct bi_stats *stats)
{
/* Count instructions */
stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0);
/* Non-message passing tuples are always arithmetic */
if (tuple->add != clause->message) {
stats->nr_arith++;
return;
}
/* Message + FMA we'll count as arithmetic _and_ message */
if (tuple->fma)
stats->nr_arith++;
switch (clause->message_type) {
case BIFROST_MESSAGE_VARYING:
/* Check components interpolated */
stats->nr_varying += (clause->message->vecsize + 1) *
(bi_is_regfmt_16(clause->message->register_format) ? 1 : 2);
break;
case BIFROST_MESSAGE_VARTEX:
/* 2 coordinates, fp32 each */
stats->nr_varying += (2 * 2);
FALLTHROUGH;
case BIFROST_MESSAGE_TEX:
stats->nr_texture++;
break;
case BIFROST_MESSAGE_ATTRIBUTE:
case BIFROST_MESSAGE_LOAD:
case BIFROST_MESSAGE_STORE:
case BIFROST_MESSAGE_ATOMIC:
stats->nr_ldst++;
break;
case BIFROST_MESSAGE_NONE:
case BIFROST_MESSAGE_BARRIER:
case BIFROST_MESSAGE_BLEND:
case BIFROST_MESSAGE_TILE:
case BIFROST_MESSAGE_Z_STENCIL:
case BIFROST_MESSAGE_ATEST:
case BIFROST_MESSAGE_JOB:
case BIFROST_MESSAGE_64BIT:
/* Nothing to do */
break;
};
}
static void
bi_print_stats(bi_context *ctx, unsigned size, FILE *fp)
{
unsigned nr_clauses = 0, nr_tuples = 0, nr_ins = 0;
struct bi_stats stats = { 0 };
/* Count instructions, clauses, and tuples. Also attempt to construct
* normalized execution engine cycle counts, using the following ratio:
*
* 24 arith tuples/cycle
* 2 texture messages/cycle
* 16 x 16-bit varying channels interpolated/cycle
* 1 load store message/cycle
*
* These numbers seem to match Arm Mobile Studio's heuristic. The real
* cycle counts are surely more complicated.
*/
/* Count instructions, clauses, and tuples */
bi_foreach_block(ctx, _block) {
bi_block *block = (bi_block *) _block;
bi_foreach_clause_in_block(block, clause) {
nr_clauses++;
nr_tuples += clause->tuple_count;
stats.nr_clauses++;
stats.nr_tuples += clause->tuple_count;
for (unsigned i = 0; i < clause->tuple_count; ++i) {
if (clause->tuples[i].fma)
nr_ins++;
if (clause->tuples[i].add)
nr_ins++;
}
for (unsigned i = 0; i < clause->tuple_count; ++i)
bi_count_tuple_stats(clause, &clause->tuples[i], &stats);
}
}
float cycles_arith = ((float) stats.nr_arith) / 24.0;
float cycles_texture = ((float) stats.nr_texture) / 2.0;
float cycles_varying = ((float) stats.nr_varying) / 16.0;
float cycles_ldst = ((float) stats.nr_ldst) / 1.0;
float cycles_message = MAX3(cycles_texture, cycles_varying, cycles_ldst);
float cycles_bound = MAX2(cycles_arith, cycles_message);
/* Thread count and register pressure are traded off only on v7 */
bool full_threads = (ctx->arch == 7 && ctx->info->work_reg_count <= 32);
unsigned nr_threads = full_threads ? 2 : 1;
@ -2913,12 +2984,15 @@ bi_print_stats(bi_context *ctx, unsigned size, FILE *fp)
fprintf(stderr, "%s - %s shader: "
"%u inst, %u tuples, %u clauses, "
"%f cycles, %f arith, %f texture, %f vary, %f ldst, "
"%u quadwords, %u threads, %u loops, "
"%u:%u spills:fills\n",
ctx->nir->info.label ?: "",
ctx->inputs->is_blend ? "PAN_SHADER_BLEND" :
gl_shader_stage_name(ctx->stage),
nr_ins, nr_tuples, nr_clauses,
stats.nr_ins, stats.nr_tuples, stats.nr_clauses,
cycles_bound, cycles_arith, cycles_texture,
cycles_varying, cycles_ldst,
size / 16, nr_threads,
ctx->loop_count,
ctx->spills, ctx->fills);

View File

@ -44,7 +44,7 @@ bi_has_arg(bi_instr *ins, bi_index arg)
* 32-bit. Note auto reads to 32-bit registers even if the memory format is
* 16-bit, so is considered as such here */
static bool
bool
bi_is_regfmt_16(enum bi_register_format fmt)
{
switch (fmt) {

View File

@ -768,6 +768,7 @@ pan_next_block(pan_block *block)
bool bi_has_arg(bi_instr *ins, bi_index arg);
unsigned bi_count_read_registers(bi_instr *ins, unsigned src);
unsigned bi_count_write_registers(bi_instr *ins, unsigned dest);
bool bi_is_regfmt_16(enum bi_register_format fmt);
unsigned bi_writemask(bi_instr *ins, unsigned dest);
bi_clause * bi_next_clause(bi_context *ctx, pan_block *block, bi_clause *clause);
bool bi_side_effects(enum bi_opcode op);