nvc0/ir: flush denorms to zero in non-compute shaders
This will set the FTZ flag (flush denorms to zero) on all opcodes that can take it. This resolves issues in Unigine Heaven 4.0 where there were solid-filled boxes popping up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455 Cc: "10.4 10.5" <mesa-stable@lists.freedesktop.org> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
66985d2a6d
commit
6fe0d4f035
|
@ -96,6 +96,26 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
|
|||
bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizeSSA::handleFTZ(Instruction *i)
|
||||
{
|
||||
// Only want to flush float inputs
|
||||
if (i->sType != TYPE_F32)
|
||||
return;
|
||||
|
||||
// If we're already flushing denorms (and NaN's) to zero, no need for this.
|
||||
if (i->dnz)
|
||||
return;
|
||||
|
||||
// Only certain classes of operations can flush
|
||||
OpClass cls = prog->getTarget()->getOpClass(i->op);
|
||||
if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
|
||||
cls != OPCLASS_CONVERT)
|
||||
return;
|
||||
|
||||
i->ftz = true;
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LegalizeSSA::visit(Function *fn)
|
||||
{
|
||||
|
@ -109,8 +129,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
|
|||
Instruction *next;
|
||||
for (Instruction *i = bb->getEntry(); i; i = next) {
|
||||
next = i->next;
|
||||
if (i->dType == TYPE_F32)
|
||||
if (i->dType == TYPE_F32) {
|
||||
if (prog->getType() != Program::TYPE_COMPUTE)
|
||||
handleFTZ(i);
|
||||
continue;
|
||||
}
|
||||
switch (i->op) {
|
||||
case OP_DIV:
|
||||
case OP_MOD:
|
||||
|
|
|
@ -36,6 +36,7 @@ private:
|
|||
// we want to insert calls to the builtin library only after optimization
|
||||
void handleDIV(Instruction *); // integer division, modulus
|
||||
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
|
||||
void handleFTZ(Instruction *);
|
||||
|
||||
private:
|
||||
BuildUtil bld;
|
||||
|
|
Loading…
Reference in New Issue