gm200/ir: add native OP_SQRT support
./GpuTest /test=pixmark_piano 1024x640 30sec: 301 -> 327 points shader-db: total instructions in shared programs : 5472103 -> 5456166 (-0.29%) total gprs used in shared programs : 647530 -> 647522 (-0.00%) total shared used in shared programs : 389120 -> 389120 (0.00%) total local used in shared programs : 21064 -> 21064 (0.00%) total bytes used in shared programs : 58459304 -> 58288696 (-0.29%) local shared gpr inst bytes helped 0 0 27 8281 8281 hurt 0 0 21 431 431 v2: use NVISA_GM200_CHIPSET Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu> Signed-off-by: Karol Herbst <kherbst@redhat.com>
This commit is contained in:
parent
4334196ab3
commit
6f98a3065b
|
@ -1409,6 +1409,7 @@ CodeEmitterGM107::emitMUFU()
|
|||
case OP_LG2: mufu = 3; break;
|
||||
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
|
||||
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
|
||||
case OP_SQRT: mufu = 8; break;
|
||||
default:
|
||||
assert(!"invalid mufu");
|
||||
break;
|
||||
|
@ -1418,7 +1419,7 @@ CodeEmitterGM107::emitMUFU()
|
|||
emitSAT (0x32);
|
||||
emitNEG (0x30, insn->src(0));
|
||||
emitABS (0x2e, insn->src(0));
|
||||
emitField(0x14, 3, mufu);
|
||||
emitField(0x14, 4, mufu);
|
||||
emitGPR (0x08, insn->src(0));
|
||||
emitGPR (0x00, insn->def(0));
|
||||
}
|
||||
|
@ -3342,6 +3343,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
|||
case OP_LG2:
|
||||
case OP_RCP:
|
||||
case OP_RSQ:
|
||||
case OP_SQRT:
|
||||
emitMUFU();
|
||||
break;
|
||||
case OP_AND:
|
||||
|
|
|
@ -2752,6 +2752,9 @@ NVC0LoweringPass::handleMOD(Instruction *i)
|
|||
bool
|
||||
NVC0LoweringPass::handleSQRT(Instruction *i)
|
||||
{
|
||||
if (targ->isOpSupported(OP_SQRT, i->dType))
|
||||
return true;
|
||||
|
||||
if (i->dType == TYPE_F64) {
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
Value *zero = bld.loadImm(NULL, 0.0);
|
||||
|
|
|
@ -57,10 +57,13 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
|
|||
switch (op) {
|
||||
case OP_SAD:
|
||||
case OP_POW:
|
||||
case OP_SQRT:
|
||||
case OP_DIV:
|
||||
case OP_MOD:
|
||||
return false;
|
||||
case OP_SQRT:
|
||||
if (ty == TYPE_F64)
|
||||
return false;
|
||||
return chipset >= NVISA_GM200_CHIPSET;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -125,6 +128,7 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
|
|||
case OP_RCP:
|
||||
case OP_RSQ:
|
||||
case OP_SIN:
|
||||
case OP_SQRT:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
|
@ -256,6 +260,7 @@ TargetGM107::getLatency(const Instruction *insn) const
|
|||
case OP_RCP:
|
||||
case OP_RSQ:
|
||||
case OP_SIN:
|
||||
case OP_SQRT:
|
||||
return 13;
|
||||
default:
|
||||
break;
|
||||
|
@ -284,6 +289,7 @@ TargetGM107::getReadLatency(const Instruction *insn) const
|
|||
case OP_RSQ:
|
||||
case OP_SAT:
|
||||
case OP_SIN:
|
||||
case OP_SQRT:
|
||||
case OP_SULDB:
|
||||
case OP_SULDP:
|
||||
case OP_SUREDB:
|
||||
|
|
|
@ -129,6 +129,7 @@ static const struct opProperties _initProps[] =
|
|||
{ OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
|
||||
{ OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
|
||||
{ OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
|
||||
{ OP_SQRT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
|
||||
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
|
||||
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
|
||||
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
|
||||
|
|
Loading…
Reference in New Issue