nvc0/ir: detect i2f/i2i which operate on specific bytes/words
Some Unigine shaders have been observed to unpack bytes out of 32-bit integers and convert them to floats. I2F/I2I can handle this sort of thing directly. Detect the handleable situations. This misses 16-bit word capabilities in nv50, but I haven't seen shaders that would actually make use of that. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
51499bb5ff
commit
63cb85e567
|
@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i)
|
|||
|
||||
code[0] |= typeSizeofLog2(dType) << 10;
|
||||
code[0] |= typeSizeofLog2(i->sType) << 12;
|
||||
code[1] |= i->subOp << 12;
|
||||
|
||||
if (isSignedIntType(dType))
|
||||
code[0] |= 0x4000;
|
||||
|
|
|
@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F()
|
|||
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
|
||||
emitCC (0x2f);
|
||||
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
|
||||
emitField(0x29, 2, insn->subOp);
|
||||
emitRND (0x27, rnd, -1);
|
||||
emitField(0x0d, 1, isSignedType(insn->sType));
|
||||
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
|
||||
|
@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I()
|
|||
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
|
||||
emitCC (0x2f);
|
||||
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
|
||||
emitField(0x29, 2, insn->subOp);
|
||||
emitField(0x0d, 1, isSignedType(insn->sType));
|
||||
emitField(0x0c, 1, isSignedType(insn->dType));
|
||||
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
|
||||
|
|
|
@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
|
|||
code[0] |= util_logbase2(typeSizeof(dType)) << 20;
|
||||
code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
|
||||
|
||||
// for 8/16 source types, the byte/word is in subOp. word 1 is
|
||||
// represented as 2.
|
||||
code[1] |= i->subOp << 0x17;
|
||||
|
||||
if (sat)
|
||||
code[0] |= 0x20;
|
||||
if (abs)
|
||||
|
|
|
@ -1238,7 +1238,8 @@ private:
|
|||
void handleRCP(Instruction *);
|
||||
void handleSLCT(Instruction *);
|
||||
void handleLOGOP(Instruction *);
|
||||
void handleCVT(Instruction *);
|
||||
void handleCVT_NEG(Instruction *);
|
||||
void handleCVT_EXTBF(Instruction *);
|
||||
void handleSUCLAMP(Instruction *);
|
||||
|
||||
BuildUtil bld;
|
||||
|
@ -1489,12 +1490,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop)
|
|||
// nv50:
|
||||
// F2I(NEG(I2F(ABS(SET))))
|
||||
void
|
||||
AlgebraicOpt::handleCVT(Instruction *cvt)
|
||||
AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
|
||||
{
|
||||
Instruction *insn = cvt->getSrc(0)->getInsn();
|
||||
if (cvt->sType != TYPE_F32 ||
|
||||
cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
|
||||
return;
|
||||
Instruction *insn = cvt->getSrc(0)->getInsn();
|
||||
if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
|
||||
return;
|
||||
if (insn->src(0).mod != Modifier(0))
|
||||
|
@ -1524,6 +1525,74 @@ AlgebraicOpt::handleCVT(Instruction *cvt)
|
|||
delete_Instruction(prog, cvt);
|
||||
}
|
||||
|
||||
// Some shaders extract packed bytes out of words and convert them to
|
||||
// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
|
||||
// nv50 for word sizes.
|
||||
//
|
||||
// CVT(EXTBF(x, byte/word))
|
||||
// CVT(AND(bytemask, x))
|
||||
// CVT(AND(bytemask, SHR(x, 8/16/24)))
|
||||
void
|
||||
AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
|
||||
{
|
||||
Instruction *insn = cvt->getSrc(0)->getInsn();
|
||||
ImmediateValue imm0, imm1;
|
||||
Value *arg = NULL;
|
||||
unsigned width, offset;
|
||||
if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn)
|
||||
return;
|
||||
if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm0)) {
|
||||
width = (imm0.reg.data.u32 >> 8) & 0xff;
|
||||
offset = imm0.reg.data.u32 & 0xff;
|
||||
arg = insn->getSrc(0);
|
||||
|
||||
if (width != 8 && width != 16)
|
||||
return;
|
||||
if (width == 8 && offset & 0x7)
|
||||
return;
|
||||
if (width == 16 && offset & 0xf)
|
||||
return;
|
||||
} else if (insn->op == OP_AND) {
|
||||
int s;
|
||||
if (insn->src(0).getImmediate(imm0))
|
||||
s = 0;
|
||||
else if (insn->src(1).getImmediate(imm0))
|
||||
s = 1;
|
||||
else
|
||||
return;
|
||||
|
||||
if (imm0.reg.data.u32 == 0xff)
|
||||
width = 8;
|
||||
else if (imm0.reg.data.u32 == 0xffff)
|
||||
width = 16;
|
||||
else
|
||||
return;
|
||||
|
||||
arg = insn->getSrc(!s);
|
||||
Instruction *shift = arg->getInsn();
|
||||
offset = 0;
|
||||
if (shift && shift->op == OP_SHR &&
|
||||
shift->src(1).getImmediate(imm1) &&
|
||||
((width == 8 && (imm1.reg.data.u32 & 0x7) == 0) ||
|
||||
(width == 16 && (imm1.reg.data.u32 & 0xf) == 0))) {
|
||||
arg = shift->getSrc(0);
|
||||
offset = imm1.reg.data.u32;
|
||||
}
|
||||
}
|
||||
|
||||
if (!arg)
|
||||
return;
|
||||
|
||||
if (width == 8) {
|
||||
cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8;
|
||||
} else {
|
||||
assert(width == 16);
|
||||
cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16;
|
||||
}
|
||||
cvt->setSrc(0, arg);
|
||||
cvt->subOp = offset >> 3;
|
||||
}
|
||||
|
||||
// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
|
||||
void
|
||||
AlgebraicOpt::handleSUCLAMP(Instruction *insn)
|
||||
|
@ -1594,7 +1663,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
|
|||
handleLOGOP(i);
|
||||
break;
|
||||
case OP_CVT:
|
||||
handleCVT(i);
|
||||
handleCVT_NEG(i);
|
||||
if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
|
||||
handleCVT_EXTBF(i);
|
||||
break;
|
||||
case OP_SUCLAMP:
|
||||
handleSUCLAMP(i);
|
||||
|
|
Loading…
Reference in New Issue