nv50/ir: swap the least-ref'd source into src1 when both const/imm
The whole point of inlining sources is to reduce loads. We can end up in a situation where one value is used a lot of times, and one value is used only once per instruction. The once-per-instruction one is the one that should get inlined, but with the previous algorithm, it was given no preference. This flips things around to preferring putting less-referenced values into src1 which increases the likelihood of them being inlined. While we're at it, adjust the heuristic to not treat 0 as an immediate, as well as (effectively) check for situations where LIMMs can't be loaded. All this yields improvements on nvc0: total instructions in shared programs : 6261157 -> 6255985 (-0.08%) total gprs used in shared programs : 945082 -> 943417 (-0.18%) total local used in shared programs : 30372 -> 30288 (-0.28%) total bytes used in shared programs : 50089256 -> 50047880 (-0.08%) local gpr inst bytes helped 21 822 3332 3332 hurt 0 278 565 565 And more importantly avoids generating really bad code with SSBOs, where we end up checking a lot of different values (usually immediates) against the length. On nv50 we get comparable results, and even improve packing (bytes went down more than instructions): total instructions in shared programs : 6346564 -> 6341277 (-0.08%) total gprs used in shared programs : 728719 -> 725131 (-0.49%) total local used in shared programs : 3552 -> 3552 (0.00%) total bytes used in shared programs : 43995688 -> 43932928 (-0.14%) local gpr inst bytes helped 0 1380 3252 3774 hurt 0 287 1710 1365 Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
af686e7de3
commit
a31819cff8
|
@ -171,7 +171,10 @@ LoadPropagation::isImmdLoad(Instruction *ld)
|
|||
if (!ld || (ld->op != OP_MOV) ||
|
||||
((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8)))
|
||||
return false;
|
||||
return ld->src(0).getFile() == FILE_IMMEDIATE;
|
||||
|
||||
// A 0 can be replaced with a register, so it doesn't count as an immediate.
|
||||
ImmediateValue val;
|
||||
return ld->src(0).getImmediate(val) && !val.isInteger(0);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -187,7 +190,8 @@ LoadPropagation::isAttribOrSharedLoad(Instruction *ld)
|
|||
void
|
||||
LoadPropagation::checkSwapSrc01(Instruction *insn)
|
||||
{
|
||||
if (!prog->getTarget()->getOpInfo(insn).commutative)
|
||||
const Target *targ = prog->getTarget();
|
||||
if (!targ->getOpInfo(insn).commutative)
|
||||
if (insn->op != OP_SET && insn->op != OP_SLCT)
|
||||
return;
|
||||
if (insn->src(1).getFile() != FILE_GPR)
|
||||
|
@ -196,14 +200,15 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
|
|||
Instruction *i0 = insn->getSrc(0)->getInsn();
|
||||
Instruction *i1 = insn->getSrc(1)->getInsn();
|
||||
|
||||
if (isCSpaceLoad(i0)) {
|
||||
if (!isCSpaceLoad(i1))
|
||||
insn->swapSources(0, 1);
|
||||
else
|
||||
return;
|
||||
} else
|
||||
if (isImmdLoad(i0)) {
|
||||
if (!isCSpaceLoad(i1) && !isImmdLoad(i1))
|
||||
// Swap sources to inline the less frequently used source. That way,
|
||||
// optimistically, it will eventually be able to remove the instruction.
|
||||
int i0refs = insn->getSrc(0)->refCount();
|
||||
int i1refs = insn->getSrc(1)->refCount();
|
||||
|
||||
if ((isCSpaceLoad(i0) || isImmdLoad(i0)) && targ->insnCanLoad(insn, 1, i0)) {
|
||||
if ((!isImmdLoad(i1) && !isCSpaceLoad(i1)) ||
|
||||
!targ->insnCanLoad(insn, 1, i1) ||
|
||||
i0refs < i1refs)
|
||||
insn->swapSources(0, 1);
|
||||
else
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue