aco: add various compiler statistics
Adds these statistics: - hash of code and constant data - number of instructions - number of copies from pseudo-instructions - number of branches - estimate of cycles spent not waiting in s_waitcnt - number of vmem/smem "clauses" - sgpr/vgpr usage before scheduling Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2965>
This commit is contained in:
parent
ad2703653f
commit
b1544352c0
|
@ -54,6 +54,18 @@ static void init()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static radv_compiler_statistic_info statistic_infos[] = {
|
||||||
|
[aco::statistic_hash] = {"Hash", "CRC32 hash of code and constant data"},
|
||||||
|
[aco::statistic_instructions] = {"Instructions", "Instruction count"},
|
||||||
|
[aco::statistic_copies] = {"Copies", "Copy instructions created for pseudo-instructions"},
|
||||||
|
[aco::statistic_branches] = {"Branches", "Branch instructions"},
|
||||||
|
[aco::statistic_cycles] = {"Busy Cycles", "Estimate of busy cycles"},
|
||||||
|
[aco::statistic_vmem_clauses] = {"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"},
|
||||||
|
[aco::statistic_smem_clauses] = {"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"},
|
||||||
|
[aco::statistic_sgpr_presched] = {"Pre-Sched SGPRs", "SGPR usage before scheduling"},
|
||||||
|
[aco::statistic_vgpr_presched] = {"Pre-Sched VGPRs", "VGPR usage before scheduling"},
|
||||||
|
};
|
||||||
|
|
||||||
void aco_compile_shader(unsigned shader_count,
|
void aco_compile_shader(unsigned shader_count,
|
||||||
struct nir_shader *const *shaders,
|
struct nir_shader *const *shaders,
|
||||||
struct radv_shader_binary **binary,
|
struct radv_shader_binary **binary,
|
||||||
|
@ -64,6 +76,10 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
ac_shader_config config = {0};
|
ac_shader_config config = {0};
|
||||||
std::unique_ptr<aco::Program> program{new aco::Program};
|
std::unique_ptr<aco::Program> program{new aco::Program};
|
||||||
|
|
||||||
|
program->collect_statistics = args->options->record_ir;
|
||||||
|
if (program->collect_statistics)
|
||||||
|
memset(program->statistics, 0, sizeof(program->statistics));
|
||||||
|
|
||||||
/* Instruction Selection */
|
/* Instruction Selection */
|
||||||
if (args->is_gs_copy_shader)
|
if (args->is_gs_copy_shader)
|
||||||
aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
|
aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
|
||||||
|
@ -94,6 +110,9 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
aco::live live_vars = aco::live_var_analysis(program.get(), args->options);
|
aco::live live_vars = aco::live_var_analysis(program.get(), args->options);
|
||||||
aco::spill(program.get(), live_vars, args->options);
|
aco::spill(program.get(), live_vars, args->options);
|
||||||
|
|
||||||
|
if (program->collect_statistics)
|
||||||
|
aco::collect_presched_stats(program.get());
|
||||||
|
|
||||||
//std::cerr << "Before Schedule:\n";
|
//std::cerr << "Before Schedule:\n";
|
||||||
//aco_print_program(program.get(), stderr);
|
//aco_print_program(program.get(), stderr);
|
||||||
aco::schedule_program(program.get(), live_vars);
|
aco::schedule_program(program.get(), live_vars);
|
||||||
|
@ -139,10 +158,16 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
//std::cerr << "After Insert-Waitcnt:\n";
|
//std::cerr << "After Insert-Waitcnt:\n";
|
||||||
//aco_print_program(program.get(), stderr);
|
//aco_print_program(program.get(), stderr);
|
||||||
|
|
||||||
|
if (program->collect_statistics)
|
||||||
|
aco::collect_preasm_stats(program.get());
|
||||||
|
|
||||||
/* Assembly */
|
/* Assembly */
|
||||||
std::vector<uint32_t> code;
|
std::vector<uint32_t> code;
|
||||||
unsigned exec_size = aco::emit_program(program.get(), code);
|
unsigned exec_size = aco::emit_program(program.get(), code);
|
||||||
|
|
||||||
|
if (program->collect_statistics)
|
||||||
|
aco::collect_postasm_stats(program.get(), code);
|
||||||
|
|
||||||
bool get_disasm = args->options->dump_shader || args->options->record_ir;
|
bool get_disasm = args->options->dump_shader || args->options->record_ir;
|
||||||
|
|
||||||
size_t size = llvm_ir.size();
|
size_t size = llvm_ir.size();
|
||||||
|
@ -156,6 +181,11 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
size += disasm.size();
|
size += disasm.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t stats_size = 0;
|
||||||
|
if (program->collect_statistics)
|
||||||
|
stats_size = sizeof(radv_compiler_statistics) + aco::num_statistics * sizeof(uint32_t);
|
||||||
|
size += stats_size;
|
||||||
|
|
||||||
size += code.size() * sizeof(uint32_t) + sizeof(radv_shader_binary_legacy);
|
size += code.size() * sizeof(uint32_t) + sizeof(radv_shader_binary_legacy);
|
||||||
/* We need to calloc to prevent unintialized data because this will be used
|
/* We need to calloc to prevent unintialized data because this will be used
|
||||||
* directly for the disk cache. Uninitialized data can appear because of
|
* directly for the disk cache. Uninitialized data can appear because of
|
||||||
|
@ -168,9 +198,15 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
|
legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
|
||||||
legacy_binary->base.total_size = size;
|
legacy_binary->base.total_size = size;
|
||||||
|
|
||||||
legacy_binary->stats_size = 0;
|
if (program->collect_statistics) {
|
||||||
|
radv_compiler_statistics *statistics = (radv_compiler_statistics *)legacy_binary->data;
|
||||||
|
statistics->count = aco::num_statistics;
|
||||||
|
statistics->infos = statistic_infos;
|
||||||
|
memcpy(statistics->values, program->statistics, aco::num_statistics * sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
legacy_binary->stats_size = stats_size;
|
||||||
|
|
||||||
memcpy(legacy_binary->data, code.data(), code.size() * sizeof(uint32_t));
|
memcpy(legacy_binary->data + legacy_binary->stats_size, code.data(), code.size() * sizeof(uint32_t));
|
||||||
legacy_binary->exec_size = exec_size;
|
legacy_binary->exec_size = exec_size;
|
||||||
legacy_binary->code_size = code.size() * sizeof(uint32_t);
|
legacy_binary->code_size = code.size() * sizeof(uint32_t);
|
||||||
|
|
||||||
|
@ -178,10 +214,10 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
legacy_binary->disasm_size = 0;
|
legacy_binary->disasm_size = 0;
|
||||||
legacy_binary->ir_size = llvm_ir.size();
|
legacy_binary->ir_size = llvm_ir.size();
|
||||||
|
|
||||||
llvm_ir.copy((char*) legacy_binary->data + legacy_binary->code_size, llvm_ir.size());
|
llvm_ir.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, llvm_ir.size());
|
||||||
|
|
||||||
if (get_disasm) {
|
if (get_disasm) {
|
||||||
disasm.copy((char*) legacy_binary->data + legacy_binary->code_size + llvm_ir.size(), disasm.size());
|
disasm.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size + llvm_ir.size(), disasm.size());
|
||||||
legacy_binary->disasm_size = disasm.size();
|
legacy_binary->disasm_size = disasm.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1219,6 +1219,19 @@ static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
|
||||||
static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
|
static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
|
||||||
static constexpr Stage geometry_gs = sw_gs | hw_gs;
|
static constexpr Stage geometry_gs = sw_gs | hw_gs;
|
||||||
|
|
||||||
|
enum statistic {
|
||||||
|
statistic_hash,
|
||||||
|
statistic_instructions,
|
||||||
|
statistic_copies,
|
||||||
|
statistic_branches,
|
||||||
|
statistic_cycles,
|
||||||
|
statistic_vmem_clauses,
|
||||||
|
statistic_smem_clauses,
|
||||||
|
statistic_sgpr_presched,
|
||||||
|
statistic_vgpr_presched,
|
||||||
|
num_statistics
|
||||||
|
};
|
||||||
|
|
||||||
class Program final {
|
class Program final {
|
||||||
public:
|
public:
|
||||||
float_mode next_fp_mode;
|
float_mode next_fp_mode;
|
||||||
|
@ -1257,6 +1270,9 @@ public:
|
||||||
bool needs_vcc = false;
|
bool needs_vcc = false;
|
||||||
bool needs_flat_scr = false;
|
bool needs_flat_scr = false;
|
||||||
|
|
||||||
|
bool collect_statistics = false;
|
||||||
|
uint32_t statistics[num_statistics];
|
||||||
|
|
||||||
uint32_t allocateId()
|
uint32_t allocateId()
|
||||||
{
|
{
|
||||||
assert(allocationID <= 16777215);
|
assert(allocationID <= 16777215);
|
||||||
|
@ -1337,6 +1353,10 @@ void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
|
||||||
#define perfwarn(program, cond, msg, ...) do {} while(0)
|
#define perfwarn(program, cond, msg, ...) do {} while(0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void collect_presched_stats(Program *program);
|
||||||
|
void collect_preasm_stats(Program *program);
|
||||||
|
void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
|
||||||
|
|
||||||
void aco_print_instr(Instruction *instr, FILE *output);
|
void aco_print_instr(Instruction *instr, FILE *output);
|
||||||
void aco_print_program(Program *program, FILE *output);
|
void aco_print_program(Program *program, FILE *output);
|
||||||
|
|
||||||
|
|
|
@ -784,6 +784,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
|
|
||||||
copy_map.erase(it);
|
copy_map.erase(it);
|
||||||
it = copy_map.begin();
|
it = copy_map.begin();
|
||||||
|
ctx->program->statistics[statistic_copies]++;
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
/* the target reg is used as operand, check the next entry */
|
/* the target reg is used as operand, check the next entry */
|
||||||
|
@ -813,6 +814,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
Definition op_as_def = Definition(swap.op.physReg(), swap.op.regClass());
|
Definition op_as_def = Definition(swap.op.physReg(), swap.op.regClass());
|
||||||
if (chip_class >= GFX9 && swap.def.getTemp().type() == RegType::vgpr) {
|
if (chip_class >= GFX9 && swap.def.getTemp().type() == RegType::vgpr) {
|
||||||
bld.vop1(aco_opcode::v_swap_b32, swap.def, op_as_def, swap.op, def_as_op);
|
bld.vop1(aco_opcode::v_swap_b32, swap.def, op_as_def, swap.op, def_as_op);
|
||||||
|
ctx->program->statistics[statistic_copies]++;
|
||||||
} else if (swap.op.physReg() == scc || swap.def.physReg() == scc) {
|
} else if (swap.op.physReg() == scc || swap.def.physReg() == scc) {
|
||||||
/* we need to swap scc and another sgpr */
|
/* we need to swap scc and another sgpr */
|
||||||
assert(!preserve_scc);
|
assert(!preserve_scc);
|
||||||
|
@ -822,6 +824,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1));
|
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1));
|
||||||
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), Operand(0u));
|
bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), Operand(0u));
|
||||||
bld.sop1(aco_opcode::s_mov_b32, Definition(other, s1), Operand(pi->scratch_sgpr, s1));
|
bld.sop1(aco_opcode::s_mov_b32, Definition(other, s1), Operand(pi->scratch_sgpr, s1));
|
||||||
|
ctx->program->statistics[statistic_copies] += 3;
|
||||||
} else if (swap.def.getTemp().type() == RegType::sgpr) {
|
} else if (swap.def.getTemp().type() == RegType::sgpr) {
|
||||||
if (preserve_scc) {
|
if (preserve_scc) {
|
||||||
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), swap.op);
|
bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), swap.op);
|
||||||
|
@ -832,10 +835,12 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
bld.sop2(aco_opcode::s_xor_b32, swap.def, Definition(scc, s1), swap.op, def_as_op);
|
bld.sop2(aco_opcode::s_xor_b32, swap.def, Definition(scc, s1), swap.op, def_as_op);
|
||||||
bld.sop2(aco_opcode::s_xor_b32, op_as_def, Definition(scc, s1), swap.op, def_as_op);
|
bld.sop2(aco_opcode::s_xor_b32, op_as_def, Definition(scc, s1), swap.op, def_as_op);
|
||||||
}
|
}
|
||||||
|
ctx->program->statistics[statistic_copies] += 3;
|
||||||
} else {
|
} else {
|
||||||
bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
|
bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
|
||||||
bld.vop2(aco_opcode::v_xor_b32, swap.def, swap.op, def_as_op);
|
bld.vop2(aco_opcode::v_xor_b32, swap.def, swap.op, def_as_op);
|
||||||
bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
|
bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
|
||||||
|
ctx->program->statistics[statistic_copies] += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* change the operand reg of the target's use */
|
/* change the operand reg of the target's use */
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2020 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "aco_ir.h"
|
||||||
|
#include "util/crc32.h"
|
||||||
|
|
||||||
|
namespace aco {
|
||||||
|
|
||||||
|
/* sgpr_presched/vgpr_presched */
|
||||||
|
void collect_presched_stats(Program *program)
|
||||||
|
{
|
||||||
|
RegisterDemand presched_demand;
|
||||||
|
for (Block& block : program->blocks)
|
||||||
|
presched_demand.update(block.register_demand);
|
||||||
|
program->statistics[statistic_sgpr_presched] = presched_demand.sgpr;
|
||||||
|
program->statistics[statistic_vgpr_presched] = presched_demand.vgpr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* instructions/branches/vmem_clauses/smem_clauses/cycles */
|
||||||
|
void collect_preasm_stats(Program *program)
|
||||||
|
{
|
||||||
|
for (Block& block : program->blocks) {
|
||||||
|
std::set<Temp> vmem_clause_res;
|
||||||
|
std::set<Temp> smem_clause_res;
|
||||||
|
|
||||||
|
program->statistics[statistic_instructions] += block.instructions.size();
|
||||||
|
|
||||||
|
for (aco_ptr<Instruction>& instr : block.instructions) {
|
||||||
|
if (instr->format == Format::SOPP && static_cast<SOPP_instruction*>(instr.get())->block != -1)
|
||||||
|
program->statistics[statistic_branches]++;
|
||||||
|
|
||||||
|
if (instr->opcode == aco_opcode::p_constaddr)
|
||||||
|
program->statistics[statistic_instructions] += 2;
|
||||||
|
|
||||||
|
if (instr->isVMEM() && !instr->operands.empty()) {
|
||||||
|
vmem_clause_res.insert(instr->operands[0].getTemp());
|
||||||
|
} else {
|
||||||
|
program->statistics[statistic_vmem_clauses] += vmem_clause_res.size();
|
||||||
|
vmem_clause_res.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instr->format == Format::SMEM && !instr->operands.empty()) {
|
||||||
|
if (instr->operands[0].size() == 2)
|
||||||
|
smem_clause_res.insert(Temp(0, s2));
|
||||||
|
else
|
||||||
|
smem_clause_res.insert(instr->operands[0].getTemp());
|
||||||
|
} else {
|
||||||
|
program->statistics[statistic_smem_clauses] += smem_clause_res.size();
|
||||||
|
smem_clause_res.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: this incorrectly assumes instructions always take 4 cycles */
|
||||||
|
/* assume loops execute 4 times (TODO: it would be nice to be able to consider loop unrolling) */
|
||||||
|
unsigned iter = 1 << (block.loop_nest_depth * 2);
|
||||||
|
program->statistics[statistic_cycles] += 4 * iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
program->statistics[statistic_vmem_clauses] += vmem_clause_res.size();
|
||||||
|
program->statistics[statistic_smem_clauses] += smem_clause_res.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code)
|
||||||
|
{
|
||||||
|
program->statistics[aco::statistic_hash] = util_hash_crc32(code.data(), code.size() * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -76,8 +76,9 @@ libaco_files = files(
|
||||||
'aco_print_asm.cpp',
|
'aco_print_asm.cpp',
|
||||||
'aco_print_ir.cpp',
|
'aco_print_ir.cpp',
|
||||||
'aco_scheduler.cpp',
|
'aco_scheduler.cpp',
|
||||||
'aco_ssa_elimination.cpp',
|
|
||||||
'aco_spill.cpp',
|
'aco_spill.cpp',
|
||||||
|
'aco_ssa_elimination.cpp',
|
||||||
|
'aco_statistics.cpp',
|
||||||
'aco_util.h',
|
'aco_util.h',
|
||||||
'aco_validate.cpp',
|
'aco_validate.cpp',
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue