nvc0/ir: fix lowering of RSQ/RCP/SQRT/MOD to work with F64
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
93ebe91bae
commit
b87b498b88
|
@ -175,6 +175,7 @@ enum operation
|
|||
#define NV50_IR_SUBOP_MOV_FINAL 1
|
||||
#define NV50_IR_SUBOP_EXTBF_REV 1
|
||||
#define NV50_IR_SUBOP_BFIND_SAMT 1
|
||||
#define NV50_IR_SUBOP_RCPRSQ_64H 1
|
||||
#define NV50_IR_SUBOP_PERMT_F4E 1
|
||||
#define NV50_IR_SUBOP_PERMT_B4E 2
|
||||
#define NV50_IR_SUBOP_PERMT_RC8 3
|
||||
|
|
|
@ -1771,10 +1771,10 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
|
|||
emitCVT(insn);
|
||||
break;
|
||||
case OP_RSQ:
|
||||
emitSFnOp(insn, 5);
|
||||
emitSFnOp(insn, 5 + 2 * insn->subOp);
|
||||
break;
|
||||
case OP_RCP:
|
||||
emitSFnOp(insn, 4);
|
||||
emitSFnOp(insn, 4 + 2 * insn->subOp);
|
||||
break;
|
||||
case OP_LG2:
|
||||
emitSFnOp(insn, 3);
|
||||
|
|
|
@ -1265,8 +1265,8 @@ CodeEmitterGM107::emitMUFU()
|
|||
case OP_SIN: mufu = 1; break;
|
||||
case OP_EX2: mufu = 2; break;
|
||||
case OP_LG2: mufu = 3; break;
|
||||
case OP_RCP: mufu = 4; break;
|
||||
case OP_RSQ: mufu = 5; break;
|
||||
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
|
||||
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
|
||||
default:
|
||||
assert(!"invalid mufu");
|
||||
break;
|
||||
|
|
|
@ -2365,10 +2365,10 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
|
|||
emitCVT(insn);
|
||||
break;
|
||||
case OP_RSQ:
|
||||
emitSFnOp(insn, 5);
|
||||
emitSFnOp(insn, 5 + 2 * insn->subOp);
|
||||
break;
|
||||
case OP_RCP:
|
||||
emitSFnOp(insn, 4);
|
||||
emitSFnOp(insn, 4 + 2 * insn->subOp);
|
||||
break;
|
||||
case OP_LG2:
|
||||
emitSFnOp(insn, 3);
|
||||
|
|
|
@ -70,7 +70,30 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
|
|||
void
|
||||
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
|
||||
{
|
||||
// TODO
|
||||
assert(i->dType == TYPE_F64);
|
||||
// There are instructions that will compute the high 32 bits of the 64-bit
|
||||
// float. We will just stick 0 in the bottom 32 bits.
|
||||
|
||||
bld.setPosition(i, false);
|
||||
|
||||
// 1. Take the source and it up.
|
||||
Value *src[2], *dst[2], *def = i->getDef(0);
|
||||
bld.mkSplit(src, 4, i->getSrc(0));
|
||||
|
||||
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
|
||||
dst[0] = bld.loadImm(NULL, 0);
|
||||
dst[1] = bld.getSSA();
|
||||
|
||||
// 3. The new version of the instruction takes the high 32 bits of the
|
||||
// source and outputs the high 32 bits of the destination.
|
||||
i->setSrc(0, src[1]);
|
||||
i->setDef(0, dst[1]);
|
||||
i->setType(TYPE_F32);
|
||||
i->subOp = NV50_IR_SUBOP_RCPRSQ_64H;
|
||||
|
||||
// 4. Recombine the two dst pieces back into the original destination.
|
||||
bld.setPosition(i, true);
|
||||
bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1520,7 +1543,7 @@ NVC0LoweringPass::handleDIV(Instruction *i)
|
|||
if (!isFloatType(i->dType))
|
||||
return true;
|
||||
bld.setPosition(i, false);
|
||||
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
|
||||
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1));
|
||||
i->op = OP_MUL;
|
||||
i->setSrc(1, rcp->getDef(0));
|
||||
return true;
|
||||
|
@ -1529,13 +1552,13 @@ NVC0LoweringPass::handleDIV(Instruction *i)
|
|||
bool
|
||||
NVC0LoweringPass::handleMOD(Instruction *i)
|
||||
{
|
||||
if (i->dType != TYPE_F32)
|
||||
if (!isFloatType(i->dType))
|
||||
return true;
|
||||
LValue *value = bld.getScratch();
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
|
||||
bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
|
||||
bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
|
||||
bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
|
||||
LValue *value = bld.getScratch(typeSizeof(i->dType));
|
||||
bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1));
|
||||
bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value);
|
||||
bld.mkOp1(OP_TRUNC, i->dType, value, value);
|
||||
bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value);
|
||||
i->op = OP_SUB;
|
||||
i->setSrc(1, value);
|
||||
return true;
|
||||
|
@ -1544,8 +1567,8 @@ NVC0LoweringPass::handleMOD(Instruction *i)
|
|||
bool
|
||||
NVC0LoweringPass::handleSQRT(Instruction *i)
|
||||
{
|
||||
Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
|
||||
bld.getSSA(), i->getSrc(0));
|
||||
Instruction *rsq = bld.mkOp1(OP_RSQ, i->dType,
|
||||
bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
|
||||
i->op = OP_MUL;
|
||||
i->setSrc(1, rsq->getDef(0));
|
||||
|
||||
|
|
Loading…
Reference in New Issue