nv50/ir/opt: improve post-multiply and check target for support
This commit is contained in:
parent
56cf2da022
commit
55f9bdb64e
|
@ -216,6 +216,8 @@ private:
|
|||
|
||||
void unary(Instruction *, const ImmediateValue&);
|
||||
|
||||
void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
|
||||
|
||||
// TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
|
||||
CmpInstruction *findOriginForTestWithZero(Value *);
|
||||
|
||||
|
@ -527,6 +529,73 @@ ConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
|
|||
i->src[0].mod = Modifier(0);
|
||||
}
|
||||
|
||||
void
|
||||
ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
|
||||
const int s, ImmediateValue& imm2)
|
||||
{
|
||||
const int t = s ? 0 : 1;
|
||||
Instruction *insn;
|
||||
Instruction *mul1 = NULL; // mul1 before mul2
|
||||
int e = 0;
|
||||
float f = imm2.reg.data.f32;
|
||||
|
||||
assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
|
||||
|
||||
if (mul2->getSrc(t)->refCount() == 1) {
|
||||
insn = mul2->getSrc(t)->getInsn();
|
||||
if (insn->op == OP_MUL && insn->dType == TYPE_F32)
|
||||
mul1 = insn;
|
||||
if (mul1) {
|
||||
int s1 = 0;
|
||||
ImmediateValue *imm = mul1->src[s1].getImmediate();
|
||||
if (!imm) {
|
||||
s1 = 1;
|
||||
imm = mul1->src[s1].getImmediate();
|
||||
}
|
||||
if (imm) {
|
||||
bld.setPosition(mul1, false);
|
||||
// a = mul r, imm1
|
||||
// d = mul a, imm2 -> d = mul r, (imm1 * imm2)
|
||||
ImmediateValue imm1(mul1->src[s1].getImmediate(), TYPE_F32);
|
||||
mul1->src[s1].mod.applyTo(imm1);
|
||||
mul1->src[s1].mod = Modifier(0);
|
||||
mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
|
||||
mul2->def[0].replace(mul1->getDef(0), false);
|
||||
} else
|
||||
if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
|
||||
// c = mul a, b
|
||||
// d = mul c, imm -> d = mul_x_imm a, b
|
||||
mul1->postFactor = e;
|
||||
mul2->def[0].replace(mul1->getDef(0), false);
|
||||
if (f < 0)
|
||||
mul1->src[0].mod = mul1->src[0].mod ^ Modifier(NV50_IR_MOD_NEG);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mul2->getDef(0)->refCount() == 1) {
|
||||
// b = mul a, imm
|
||||
// d = mul b, c -> d = mul_x_imm a, c
|
||||
int s2, t2;
|
||||
insn = mul2->getDef(0)->uses->getInsn();
|
||||
if (!insn)
|
||||
return;
|
||||
mul1 = mul2;
|
||||
mul2 = NULL;
|
||||
s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1;
|
||||
t2 = s2 ? 0 : 1;
|
||||
if (insn->op == OP_MUL && insn->dType == TYPE_F32)
|
||||
if (!insn->src[t2].getImmediate())
|
||||
mul2 = insn;
|
||||
if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
|
||||
mul2->postFactor = e;
|
||||
mul2->setSrc(s2, mul1->src[t]);
|
||||
if (f < 0)
|
||||
mul2->src[s2].mod = mul2->src[s2].mod ^ Modifier(NV50_IR_MOD_NEG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s)
|
||||
{
|
||||
|
@ -539,41 +608,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s)
|
|||
|
||||
switch (i->op) {
|
||||
case OP_MUL:
|
||||
if (i->dType == TYPE_F32 && i->getSrc(t)->refCount() == 1) {
|
||||
Instruction *si = i->getSrc(t)->getUniqueInsn();
|
||||
if (i->dType == TYPE_F32)
|
||||
tryCollapseChainedMULs(i, s, imm);
|
||||
|
||||
if (si && si->op == OP_MUL) {
|
||||
float f = imm.reg.data.f32;
|
||||
|
||||
if (si->src[1].getImmediate()) {
|
||||
f *= si->src[1].getImmediate()->reg.data.f32;
|
||||
si->setSrc(1, new_ImmediateValue(prog, f));
|
||||
i->def[0].replace(i->getSrc(t), false);
|
||||
break;
|
||||
} else {
|
||||
int fac;
|
||||
if (f == 0.125f) fac = -3;
|
||||
else
|
||||
if (f == 0.250f) fac = -2;
|
||||
else
|
||||
if (f == 0.500f) fac = -1;
|
||||
else
|
||||
if (f == 2.000f) fac = +1;
|
||||
else
|
||||
if (f == 4.000f) fac = +2;
|
||||
else
|
||||
if (f == 8.000f) fac = +3;
|
||||
else
|
||||
fac = 0;
|
||||
if (fac) {
|
||||
// FIXME: allowed & modifier
|
||||
si->postFactor = fac;
|
||||
i->def[0].replace(i->getSrc(t), false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (imm.isInteger(0)) {
|
||||
i->op = OP_MOV;
|
||||
i->setSrc(0, i->getSrc(s));
|
||||
|
@ -905,6 +942,9 @@ AlgebraicOpt::handleADD(Instruction *add)
|
|||
|
||||
src = add->getSrc(s);
|
||||
|
||||
if (src->getInsn()->postFactor)
|
||||
return;
|
||||
|
||||
mod[0] = add->src[0].mod;
|
||||
mod[1] = add->src[1].mod;
|
||||
mod[2] = src->getUniqueInsn()->src[0].mod;
|
||||
|
|
|
@ -142,6 +142,8 @@ public:
|
|||
virtual bool isModSupported(const Instruction *,
|
||||
int s, Modifier) const = 0;
|
||||
virtual bool isSatSupported(const Instruction *) const = 0;
|
||||
virtual bool isPostMultiplySupported(operation op, float f,
|
||||
int& e) const { return false; }
|
||||
virtual bool mayPredicate(const Instruction *,
|
||||
const Value *) const = 0;
|
||||
|
||||
|
|
|
@ -516,6 +516,18 @@ TargetNVC0::isSatSupported(const Instruction *insn) const
|
|||
return insn->dType == TYPE_F32;
|
||||
}
|
||||
|
||||
bool
|
||||
TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const
|
||||
{
|
||||
if (op != OP_MUL)
|
||||
return false;
|
||||
f = fabsf(f);
|
||||
e = static_cast<int>(log2f(f));
|
||||
if (e < -3 || e > 3)
|
||||
return false;
|
||||
return f == exp2f(static_cast<float>(e));
|
||||
}
|
||||
|
||||
// TODO: better values
|
||||
int TargetNVC0::getLatency(const Instruction *i) const
|
||||
{
|
||||
|
|
|
@ -48,6 +48,7 @@ public:
|
|||
virtual bool isAccessSupported(DataFile, DataType) const;
|
||||
virtual bool isModSupported(const Instruction *, int s, Modifier) const;
|
||||
virtual bool isSatSupported(const Instruction *) const;
|
||||
virtual bool isPostMultiplySupported(operation, float, int& e) const;
|
||||
virtual bool mayPredicate(const Instruction *, const Value *) const;
|
||||
|
||||
virtual int getLatency(const Instruction *) const;
|
||||
|
|
Loading…
Reference in New Issue