nvc0/ir: add a lowering pass for surfaces on Fermi
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
b663db44ba
commit
362e17a712
|
@ -2012,6 +2012,119 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
|||
su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8;
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
|
||||
{
|
||||
const int idx = su->tex.r;
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
||||
const uint16_t base = idx * NVE4_SU_INFO__STRIDE;
|
||||
int c;
|
||||
Value *zero = bld.mkImm(0);
|
||||
Value *src[3];
|
||||
Value *v;
|
||||
Value *ind = NULL;
|
||||
|
||||
if (su->tex.rIndirectSrc >= 0) {
|
||||
// FIXME: out of bounds
|
||||
assert(su->tex.r == 0);
|
||||
ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
|
||||
su->getIndirectR(), bld.mkImm(6));
|
||||
}
|
||||
|
||||
// get surface coordinates
|
||||
for (c = 0; c < arg; ++c)
|
||||
src[c] = su->getSrc(c);
|
||||
for (; c < 3; ++c)
|
||||
src[c] = zero;
|
||||
|
||||
// calculate pixel offset
|
||||
if (su->op == OP_SULDP || su->op == OP_SUREDP) {
|
||||
v = loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE);
|
||||
su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
|
||||
}
|
||||
|
||||
// add array layer offset
|
||||
if (su->tex.target.isArray() || su->tex.target.isCube()) {
|
||||
v = loadSuInfo32(ind, base + NVE4_SU_INFO_ARRAY);
|
||||
assert(dim > 1);
|
||||
su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
|
||||
}
|
||||
|
||||
// prevent read fault when the image is not actually bound
|
||||
CmpInstruction *pred =
|
||||
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
||||
TYPE_U32, bld.mkImm(0),
|
||||
loadSuInfo32(ind, base + NVE4_SU_INFO_ADDR));
|
||||
if (su->op != OP_SUSTP && su->tex.format) {
|
||||
const TexInstruction::ImgFormatDesc *format = su->tex.format;
|
||||
int blockwidth = format->bits[0] + format->bits[1] +
|
||||
format->bits[2] + format->bits[3];
|
||||
|
||||
assert(format->components != 0);
|
||||
// make sure that the format doesn't mismatch when it's not FMT_NONE
|
||||
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
|
||||
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
|
||||
loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE),
|
||||
pred->getDef(0));
|
||||
}
|
||||
su->setPredicate(CC_NOT_P, pred->getDef(0));
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
||||
{
|
||||
if (su->tex.target == TEX_TARGET_1D_ARRAY) {
|
||||
/* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY
|
||||
* will simplify the lowering pass and the texture constraints. */
|
||||
su->moveSources(1, 1);
|
||||
su->setSrc(1, bld.loadImm(NULL, 0));
|
||||
su->tex.target = TEX_TARGET_2D_ARRAY;
|
||||
}
|
||||
|
||||
processSurfaceCoordsNVC0(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
convertSurfaceFormat(su);
|
||||
|
||||
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
||||
LValue *addr = bld.getSSA(8);
|
||||
Value *def = su->getDef(0);
|
||||
|
||||
su->op = OP_SULEA;
|
||||
|
||||
// Set the destination to the address
|
||||
su->dType = TYPE_U64;
|
||||
su->setDef(0, addr);
|
||||
su->setDef(1, su->getPredicate());
|
||||
|
||||
bld.setPosition(su, true);
|
||||
|
||||
// Perform the atomic op
|
||||
Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA());
|
||||
red->subOp = su->subOp;
|
||||
red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0));
|
||||
red->setSrc(1, su->getSrc(arg));
|
||||
if (red->subOp == NV50_IR_SUBOP_ATOM_CAS)
|
||||
red->setSrc(2, su->getSrc(arg + 1));
|
||||
red->setIndirect(0, 0, addr);
|
||||
|
||||
// make sure to initialize dst value when the atomic operation is not
|
||||
// performed
|
||||
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||
|
||||
assert(su->cc == CC_NOT_P);
|
||||
red->setPredicate(su->cc, su->getPredicate());
|
||||
mov->setPredicate(CC_P, su->getPredicate());
|
||||
|
||||
bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0));
|
||||
|
||||
handleCasExch(red, false);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LoweringPass::handleWRSV(Instruction *i)
|
||||
{
|
||||
|
@ -2491,6 +2604,8 @@ NVC0LoweringPass::visit(Instruction *i)
|
|||
case OP_SUREDP:
|
||||
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
|
||||
handleSurfaceOpNVE4(i->asTex());
|
||||
else
|
||||
handleSurfaceOpNVC0(i->asTex());
|
||||
break;
|
||||
case OP_SUQ:
|
||||
handleSUQ(i->asTex());
|
||||
|
|
|
@ -106,6 +106,7 @@ protected:
|
|||
bool handleATOM(Instruction *);
|
||||
bool handleCasExch(Instruction *, bool needCctl);
|
||||
void handleSurfaceOpNVE4(TexInstruction *);
|
||||
void handleSurfaceOpNVC0(TexInstruction *);
|
||||
void handleSharedATOM(Instruction *);
|
||||
void handleSharedATOMNVE4(Instruction *);
|
||||
void handleLDST(Instruction *);
|
||||
|
@ -138,6 +139,7 @@ private:
|
|||
|
||||
void adjustCoordinatesMS(TexInstruction *);
|
||||
void processSurfaceCoordsNVE4(TexInstruction *);
|
||||
void processSurfaceCoordsNVC0(TexInstruction *);
|
||||
void convertSurfaceFormat(TexInstruction *);
|
||||
|
||||
protected:
|
||||
|
|
Loading…
Reference in New Issue