nvc0/ir: handle a load's reg result not being used for locked variants

For a load locked, we might not use the first result but the second
result is the predicate result of the locking. In that case the load
splitting logic doesn't apply (which is designed for splitting 128-bit
loads). Instead we take the predicate and move it into the first
position (as having a dead result in first def's position upsets all
sorts of things including RA). Update the emitters to deal with this as
well.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Tested-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Ilia Mirkin 2016-05-25 22:41:06 -04:00
parent 04ecad97ff
commit df2881381a
3 changed files with 45 additions and 11 deletions

View File

@ -2080,15 +2080,29 @@ CodeEmitterGK110::emitLOAD(const Instruction *i)
code[1] |= offset >> 9;
// Locked store on shared memory can fail.
int r = 0, p = -1;
if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
assert(i->defExists(1));
defId(i->def(1), 32 + 16);
if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
r = -1;
p = 0;
} else if (i->defExists(1)) { // r, p
p = 1;
} else {
assert(!"Expected predicate dest for load locked");
}
}
emitPredicate(i);
defId(i->def(0), 2);
if (r >= 0)
defId(i->def(r), 2);
else
code[0] |= 255 << 2;
if (p >= 0)
defId(i->def(p), 32 + 16);
if (i->getIndirect(0, 0)) {
srcId(i->src(0).getIndirect(0), 10);
if (i->getIndirect(0, 0)->reg.size == 8)

View File

@ -1874,17 +1874,31 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
}
code[1] = opc;
int r = 0, p = -1;
if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
assert(i->defExists(1));
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
defId(i->def(1), 8);
else
defId(i->def(1), 32 + 18);
if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
r = -1;
p = 0;
} else if (i->defExists(1)) { // r, p
p = 1;
} else {
assert(!"Expected predicate dest for load locked");
}
}
}
defId(i->def(0), 14);
if (r >= 0)
defId(i->def(r), 14);
else
code[0] |= 63 << 14;
if (p >= 0) {
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
defId(i->def(p), 8);
else
defId(i->def(p), 32 + 18);
}
setAddressByFile(i->src(0));
srcId(i->src(0).getIndirect(0), 20);

View File

@ -3265,14 +3265,20 @@ DeadCodeElim::visit(BasicBlock *bb)
++deadCount;
delete_Instruction(prog, i);
} else
if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
if (i->defExists(1) &&
i->subOp == 0 &&
(i->op == OP_VFETCH || i->op == OP_LOAD)) {
checkSplitLoad(i);
} else
if (i->defExists(0) && !i->getDef(0)->refCount()) {
if (i->op == OP_ATOM ||
i->op == OP_SUREDP ||
i->op == OP_SUREDB)
i->op == OP_SUREDB) {
i->setDef(0, NULL);
} else if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
i->setDef(0, i->getDef(1));
i->setDef(1, NULL);
}
}
}
return true;