nv50/ir: optimize ADD(SHL(a, b), c) to SHLADD(a, b, c)

total instructions in shared programs :2286901 -> 2284473 (-0.11%)
total gprs used in shared programs    :335256 -> 335273 (0.01%)
total local used in shared programs   :31968 -> 31968 (0.00%)

                local        gpr       inst      bytes
    helped           0          41         852         852
      hurt           0          44          23          23

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Samuel Pitoiset 2016-10-07 01:16:24 +02:00
parent 85ba409967
commit 87b06cab14
1 changed files with 87 additions and 0 deletions

View File

@ -2132,6 +2132,92 @@ AlgebraicOpt::visit(BasicBlock *bb)
// =============================================================================
// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
class LateAlgebraicOpt : public Pass
{
private:
virtual bool visit(Instruction *);
void handleADD(Instruction *);
bool tryADDToSHLADD(Instruction *);
};
void
LateAlgebraicOpt::handleADD(Instruction *add)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
return;
if (prog->getTarget()->isOpSupported(OP_SHLADD, add->dType))
tryADDToSHLADD(add);
}
// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
bool
LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);
ImmediateValue imm;
Instruction *shl;
Modifier mod[2];
Value *src;
int s;
if (add->saturate || add->usesFlags() || typeSizeof(add->dType) == 8
|| isFloatType(add->dType))
return false;
if (src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_SHL)
s = 0;
else
if (src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_SHL)
s = 1;
else
return false;
src = add->getSrc(s);
shl = src->getUniqueInsn();
if (shl->bb != add->bb || shl->usesFlags() || shl->subOp)
return false;
if (!shl->src(1).getImmediate(imm))
return false;
mod[0] = add->src(0).mod;
mod[1] = add->src(1).mod;
add->op = OP_SHLADD;
add->setSrc(2, add->src(!s));
add->src(2).mod = mod[s];
add->setSrc(0, shl->getSrc(0));
add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32));
add->src(1).mod = Modifier(0);
return true;
}
bool
LateAlgebraicOpt::visit(Instruction *i)
{
switch (i->op) {
case OP_ADD:
handleADD(i);
break;
default:
break;
}
return true;
}
// =============================================================================
static inline void
updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
{
@ -3436,6 +3522,7 @@ Program::optimizeSSA(int level)
RUN_PASS(2, AlgebraicOpt, run);
RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
RUN_PASS(1, ConstantFolding, foldAll);
RUN_PASS(2, LateAlgebraicOpt, run);
RUN_PASS(1, LoadPropagation, run);
RUN_PASS(1, IndirectPropagation, run);
RUN_PASS(2, MemoryOpt, run);