nvc0/ir: add a lowering pass for surfaces on Fermi

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Samuel Pitoiset 2016-05-08 19:13:15 +02:00
parent b663db44ba
commit 362e17a712
2 changed files with 117 additions and 0 deletions

View File

@ -2012,6 +2012,119 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8;
}
void
NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
{
const int idx = su->tex.r;
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
const uint16_t base = idx * NVE4_SU_INFO__STRIDE;
int c;
Value *zero = bld.mkImm(0);
Value *src[3];
Value *v;
Value *ind = NULL;
if (su->tex.rIndirectSrc >= 0) {
// FIXME: out of bounds
assert(su->tex.r == 0);
ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
su->getIndirectR(), bld.mkImm(6));
}
// get surface coordinates
for (c = 0; c < arg; ++c)
src[c] = su->getSrc(c);
for (; c < 3; ++c)
src[c] = zero;
// calculate pixel offset
if (su->op == OP_SULDP || su->op == OP_SUREDP) {
v = loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE);
su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
}
// add array layer offset
if (su->tex.target.isArray() || su->tex.target.isCube()) {
v = loadSuInfo32(ind, base + NVE4_SU_INFO_ARRAY);
assert(dim > 1);
su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
}
// prevent read fault when the image is not actually bound
CmpInstruction *pred =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0),
loadSuInfo32(ind, base + NVE4_SU_INFO_ADDR));
if (su->op != OP_SUSTP && su->tex.format) {
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int blockwidth = format->bits[0] + format->bits[1] +
format->bits[2] + format->bits[3];
assert(format->components != 0);
// make sure that the format doesn't mismatch when it's not FMT_NONE
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE),
pred->getDef(0));
}
su->setPredicate(CC_NOT_P, pred->getDef(0));
}
void
NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
{
if (su->tex.target == TEX_TARGET_1D_ARRAY) {
/* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY
* will simplify the lowering pass and the texture constraints. */
su->moveSources(1, 1);
su->setSrc(1, bld.loadImm(NULL, 0));
su->tex.target = TEX_TARGET_2D_ARRAY;
}
processSurfaceCoordsNVC0(su);
if (su->op == OP_SULDP)
convertSurfaceFormat(su);
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
LValue *addr = bld.getSSA(8);
Value *def = su->getDef(0);
su->op = OP_SULEA;
// Set the destination to the address
su->dType = TYPE_U64;
su->setDef(0, addr);
su->setDef(1, su->getPredicate());
bld.setPosition(su, true);
// Perform the atomic op
Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA());
red->subOp = su->subOp;
red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0));
red->setSrc(1, su->getSrc(arg));
if (red->subOp == NV50_IR_SUBOP_ATOM_CAS)
red->setSrc(2, su->getSrc(arg + 1));
red->setIndirect(0, 0, addr);
// make sure to initialize dst value when the atomic operation is not
// performed
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
assert(su->cc == CC_NOT_P);
red->setPredicate(su->cc, su->getPredicate());
mov->setPredicate(CC_P, su->getPredicate());
bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0));
handleCasExch(red, false);
}
}
bool
NVC0LoweringPass::handleWRSV(Instruction *i)
{
@ -2491,6 +2604,8 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_SUREDP:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
else
handleSurfaceOpNVC0(i->asTex());
break;
case OP_SUQ:
handleSUQ(i->asTex());

View File

@ -106,6 +106,7 @@ protected:
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
void handleSurfaceOpNVC0(TexInstruction *);
void handleSharedATOM(Instruction *);
void handleSharedATOMNVE4(Instruction *);
void handleLDST(Instruction *);
@ -138,6 +139,7 @@ private:
void adjustCoordinatesMS(TexInstruction *);
void processSurfaceCoordsNVE4(TexInstruction *);
void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
protected: