diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 6a5981daadf..27d9b8e1689 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -2080,15 +2080,29 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) code[1] |= offset >> 9; // Locked store on shared memory can fail. + int r = 0, p = -1; if (i->src(0).getFile() == FILE_MEMORY_SHARED && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { - assert(i->defExists(1)); - defId(i->def(1), 32 + 16); + if (i->def(0).getFile() == FILE_PREDICATE) { // p, # + r = -1; + p = 0; + } else if (i->defExists(1)) { // r, p + p = 1; + } else { + assert(!"Expected predicate dest for load locked"); + } } emitPredicate(i); - defId(i->def(0), 2); + if (r >= 0) + defId(i->def(r), 2); + else + code[0] |= 255 << 2; + + if (p >= 0) + defId(i->def(p), 32 + 16); + if (i->getIndirect(0, 0)) { srcId(i->src(0).getIndirect(0), 10); if (i->getIndirect(0, 0)->reg.size == 8) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 596293ed2df..1bb962f9e26 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1874,17 +1874,31 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) } code[1] = opc; + int r = 0, p = -1; if (i->src(0).getFile() == FILE_MEMORY_SHARED) { if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { - assert(i->defExists(1)); - if (targ->getChipset() >= NVISA_GK104_CHIPSET) - defId(i->def(1), 8); - else - defId(i->def(1), 32 + 18); + if (i->def(0).getFile() == FILE_PREDICATE) { // p, # + r = -1; + p = 0; + } else if (i->defExists(1)) { // r, p + p = 1; + } else { + assert(!"Expected predicate dest for load locked"); + } } } - defId(i->def(0), 14); + if (r >= 0) + defId(i->def(r), 14); + else + code[0] |= 63 << 14; + + if (p >= 0) { + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + defId(i->def(p), 8); + else + defId(i->def(p), 32 + 18); + } setAddressByFile(i->src(0)); srcId(i->src(0).getIndirect(0), 20); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index cd801f342dd..32131884c5f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -3265,14 +3265,20 @@ DeadCodeElim::visit(BasicBlock *bb) ++deadCount; delete_Instruction(prog, i); } else - if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) { + if (i->defExists(1) && + i->subOp == 0 && + (i->op == OP_VFETCH || i->op == OP_LOAD)) { checkSplitLoad(i); } else if (i->defExists(0) && !i->getDef(0)->refCount()) { if (i->op == OP_ATOM || i->op == OP_SUREDP || - i->op == OP_SUREDB) + i->op == OP_SUREDB) { i->setDef(0, NULL); + } else if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { + i->setDef(0, i->getDef(1)); + i->setDef(1, NULL); + } } } return true;