gm200/ir: add native OP_SQRT support

./GpuTest /test=pixmark_piano 1024x640 30sec:
301 -> 327 points

shader-db:
total instructions in shared programs : 5472103 -> 5456166 (-0.29%)
total gprs used in shared programs    : 647530 -> 647522 (-0.00%)
total shared used in shared programs  : 389120 -> 389120 (0.00%)
total local used in shared programs   : 21064 -> 21064 (0.00%)
total bytes used in shared programs   : 58459304 -> 58288696 (-0.29%)

                local     shared        gpr       inst      bytes
    helped           0           0          27        8281        8281
      hurt           0           0          21         431         431

v2: use NVISA_GM200_CHIPSET

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Karol Herbst <kherbst@redhat.com>
This commit is contained in:
Karol Herbst 2018-08-04 03:13:11 +02:00
parent 4334196ab3
commit 6f98a3065b
4 changed files with 14 additions and 2 deletions

View File

@ -1409,6 +1409,7 @@ CodeEmitterGM107::emitMUFU()
case OP_LG2: mufu = 3; break;
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
case OP_SQRT: mufu = 8; break;
default:
assert(!"invalid mufu");
break;
@ -1418,7 +1419,7 @@ CodeEmitterGM107::emitMUFU()
emitSAT (0x32);
emitNEG (0x30, insn->src(0));
emitABS (0x2e, insn->src(0));
emitField(0x14, 3, mufu);
emitField(0x14, 4, mufu);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
@ -3342,6 +3343,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_LG2:
case OP_RCP:
case OP_RSQ:
case OP_SQRT:
emitMUFU();
break;
case OP_AND:

View File

@ -2752,6 +2752,9 @@ NVC0LoweringPass::handleMOD(Instruction *i)
bool
NVC0LoweringPass::handleSQRT(Instruction *i)
{
if (targ->isOpSupported(OP_SQRT, i->dType))
return true;
if (i->dType == TYPE_F64) {
Value *pred = bld.getSSA(1, FILE_PREDICATE);
Value *zero = bld.loadImm(NULL, 0.0);

View File

@ -57,10 +57,13 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
switch (op) {
case OP_SAD:
case OP_POW:
case OP_SQRT:
case OP_DIV:
case OP_MOD:
return false;
case OP_SQRT:
if (ty == TYPE_F64)
return false;
return chipset >= NVISA_GM200_CHIPSET;
default:
break;
}
@ -125,6 +128,7 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
case OP_RCP:
case OP_RSQ:
case OP_SIN:
case OP_SQRT:
return true;
default:
break;
@ -256,6 +260,7 @@ TargetGM107::getLatency(const Instruction *insn) const
case OP_RCP:
case OP_RSQ:
case OP_SIN:
case OP_SQRT:
return 13;
default:
break;
@ -284,6 +289,7 @@ TargetGM107::getReadLatency(const Instruction *insn) const
case OP_RSQ:
case OP_SAT:
case OP_SIN:
case OP_SQRT:
case OP_SULDB:
case OP_SULDP:
case OP_SUREDB:

View File

@ -129,6 +129,7 @@ static const struct opProperties _initProps[] =
{ OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_SQRT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },