diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 43a6e5f0fb1..dc0667fb6a9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2012,6 +2012,119 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; } +void +NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) +{ + const int idx = su->tex.r; + const int dim = su->tex.target.getDim(); + const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); + const uint16_t base = idx * NVE4_SU_INFO__STRIDE; + int c; + Value *zero = bld.mkImm(0); + Value *src[3]; + Value *v; + Value *ind = NULL; + + if (su->tex.rIndirectSrc >= 0) { + // FIXME: out of bounds + assert(su->tex.r == 0); + ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + su->getIndirectR(), bld.mkImm(6)); + } + + // get surface coordinates + for (c = 0; c < arg; ++c) + src[c] = su->getSrc(c); + for (; c < 3; ++c) + src[c] = zero; + + // calculate pixel offset + if (su->op == OP_SULDP || su->op == OP_SUREDP) { + v = loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE); + su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v)); + } + + // add array layer offset + if (su->tex.target.isArray() || su->tex.target.isCube()) { + v = loadSuInfo32(ind, base + NVE4_SU_INFO_ARRAY); + assert(dim > 1); + su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)); + } + + // prevent read fault when the image is not actually bound + CmpInstruction *pred = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, bld.mkImm(0), + loadSuInfo32(ind, base + NVE4_SU_INFO_ADDR)); + if (su->op != OP_SUSTP && su->tex.format) { + const TexInstruction::ImgFormatDesc *format = su->tex.format; + int blockwidth = format->bits[0] + format->bits[1] + + format->bits[2] + format->bits[3]; + + assert(format->components != 0); + // make sure that the format doesn't mismatch when it's not FMT_NONE + bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), + TYPE_U32, bld.loadImm(NULL, blockwidth / 8), + loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE), + pred->getDef(0)); + } + su->setPredicate(CC_NOT_P, pred->getDef(0)); +} + +void +NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) +{ + if (su->tex.target == TEX_TARGET_1D_ARRAY) { + /* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY + * will simplify the lowering pass and the texture constraints. */ + su->moveSources(1, 1); + su->setSrc(1, bld.loadImm(NULL, 0)); + su->tex.target = TEX_TARGET_2D_ARRAY; + } + + processSurfaceCoordsNVC0(su); + + if (su->op == OP_SULDP) + convertSurfaceFormat(su); + + if (su->op == OP_SUREDB || su->op == OP_SUREDP) { + const int dim = su->tex.target.getDim(); + const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); + LValue *addr = bld.getSSA(8); + Value *def = su->getDef(0); + + su->op = OP_SULEA; + + // Set the destination to the address + su->dType = TYPE_U64; + su->setDef(0, addr); + su->setDef(1, su->getPredicate()); + + bld.setPosition(su, true); + + // Perform the atomic op + Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA()); + red->subOp = su->subOp; + red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0)); + red->setSrc(1, su->getSrc(arg)); + if (red->subOp == NV50_IR_SUBOP_ATOM_CAS) + red->setSrc(2, su->getSrc(arg + 1)); + red->setIndirect(0, 0, addr); + + // make sure to initialize dst value when the atomic operation is not + // performed + Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + + assert(su->cc == CC_NOT_P); + red->setPredicate(su->cc, su->getPredicate()); + mov->setPredicate(CC_P, su->getPredicate()); + + bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0)); + + handleCasExch(red, false); + } +} + bool NVC0LoweringPass::handleWRSV(Instruction *i) { @@ -2491,6 +2604,8 @@ NVC0LoweringPass::visit(Instruction *i) case OP_SUREDP: if (targ->getChipset() >= NVISA_GK104_CHIPSET) handleSurfaceOpNVE4(i->asTex()); + else + handleSurfaceOpNVC0(i->asTex()); break; case OP_SUQ: handleSUQ(i->asTex()); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index c007e09439e..2321956322e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -106,6 +106,7 @@ protected: bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); + void handleSurfaceOpNVC0(TexInstruction *); void handleSharedATOM(Instruction *); void handleSharedATOMNVE4(Instruction *); void handleLDST(Instruction *); @@ -138,6 +139,7 @@ private: void adjustCoordinatesMS(TexInstruction *); void processSurfaceCoordsNVE4(TexInstruction *); + void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); protected: