aco/ra: refactor affinity coalescing
Also adds v_interp_p2_f32 to the list of affinity-related instructions. Totals from 68 (0.05% of 149839) affected shaders (GFX10.3): CodeSize: 792928 -> 792056 (-0.11%) Instrs: 152843 -> 152625 (-0.14%) Latency: 1235353 -> 1235278 (-0.01%) InvThroughput: 224087 -> 224049 (-0.02%) Copies: 9218 -> 9000 (-2.36%) Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8764>
This commit is contained in:
parent
3a98f484d1
commit
09b99f1b7c
|
@ -2095,14 +2095,33 @@ void get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
|
|||
phi_ressources[it->second][0] = def.getTemp();
|
||||
/* try to coalesce phi affinities with parallelcopies */
|
||||
Operand op = Operand();
|
||||
if (!def.isFixed() && instr->opcode == aco_opcode::p_parallelcopy)
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::p_parallelcopy:
|
||||
op = instr->operands[i];
|
||||
else if ((instr->opcode == aco_opcode::v_mad_f32 ||
|
||||
(instr->opcode == aco_opcode::v_fma_f32 && ctx.program->chip_class >= GFX10) ||
|
||||
instr->opcode == aco_opcode::v_mad_f16 ||
|
||||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
|
||||
(instr->opcode == aco_opcode::v_fma_f16 && ctx.program->chip_class >= GFX10)) && !instr->usesModifiers())
|
||||
break;
|
||||
|
||||
case aco_opcode::v_interp_p2_f32:
|
||||
case aco_opcode::v_writelane_b32:
|
||||
case aco_opcode::v_writelane_b32_e64:
|
||||
op = instr->operands[2];
|
||||
break;
|
||||
|
||||
case aco_opcode::v_fma_f32:
|
||||
case aco_opcode::v_fma_f16:
|
||||
case aco_opcode::v_pk_fma_f16:
|
||||
if (ctx.program->chip_class < GFX10)
|
||||
continue;
|
||||
FALLTHROUGH;
|
||||
case aco_opcode::v_mad_f32:
|
||||
case aco_opcode::v_mad_f16:
|
||||
if (instr->usesModifiers())
|
||||
continue;
|
||||
op = instr->operands[2];
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (op.isTemp() && op.isFirstKillBeforeDef() && def.regClass() == op.regClass()) {
|
||||
phi_ressources[it->second].emplace_back(op.getTemp());
|
||||
|
|
Loading…
Reference in New Issue