aco/ra: refactor affinity coalescing

Also adds v_interp_p2_f32 to the list of
affinity-related instructions.

Totals from 68 (0.05% of 149839) affected shaders (GFX10.3):
CodeSize: 792928 -> 792056 (-0.11%)
Instrs: 152843 -> 152625 (-0.14%)
Latency: 1235353 -> 1235278 (-0.01%)
InvThroughput: 224087 -> 224049 (-0.02%)
Copies: 9218 -> 9000 (-2.36%)

Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8764>
This commit is contained in:
Daniel Schürmann 2021-01-26 18:49:58 +01:00
parent 3a98f484d1
commit 09b99f1b7c
1 changed files with 25 additions and 6 deletions

View File

@ -2095,14 +2095,33 @@ void get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
phi_ressources[it->second][0] = def.getTemp();
/* try to coalesce phi affinities with parallelcopies */
Operand op = Operand();
if (!def.isFixed() && instr->opcode == aco_opcode::p_parallelcopy)
switch (instr->opcode) {
case aco_opcode::p_parallelcopy:
op = instr->operands[i];
else if ((instr->opcode == aco_opcode::v_mad_f32 ||
(instr->opcode == aco_opcode::v_fma_f32 && ctx.program->chip_class >= GFX10) ||
instr->opcode == aco_opcode::v_mad_f16 ||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
(instr->opcode == aco_opcode::v_fma_f16 && ctx.program->chip_class >= GFX10)) && !instr->usesModifiers())
break;
case aco_opcode::v_interp_p2_f32:
case aco_opcode::v_writelane_b32:
case aco_opcode::v_writelane_b32_e64:
op = instr->operands[2];
break;
case aco_opcode::v_fma_f32:
case aco_opcode::v_fma_f16:
case aco_opcode::v_pk_fma_f16:
if (ctx.program->chip_class < GFX10)
continue;
FALLTHROUGH;
case aco_opcode::v_mad_f32:
case aco_opcode::v_mad_f16:
if (instr->usesModifiers())
continue;
op = instr->operands[2];
break;
default:
continue;
}
if (op.isTemp() && op.isFirstKillBeforeDef() && def.regClass() == op.regClass()) {
phi_ressources[it->second].emplace_back(op.getTemp());