gm107/ir: fix loading z offset for layered 3d image bindings

Unfortuantely we don't know if a particular load is a real 2d image (as
would be a cube face or 2d array element), or a layer of a 3d image.
Since we pass in the TIC reference, the instruction's type has to match
what's in the TIC (experimentally). In order to properly support
bindless images, this also can't be done by looking at the current
bindings and generating appropriate code.

As a result all plain 2d loads are converted into a pair of 2d/3d loads,
with appropriate predicates to ensure only one of those actually
executes, and the values are all merged in.

This goes somewhat against the current flow, so for GM107 we do the OOB
handling directly in the surface processing logic. Perhaps the other
gens should do something similar, but that is left to another change.

This fixes dEQP tests like image_load_store.3d.*_single_layer and GL-CTS
tests like shader_image_load_store.non-layered_binding without breaking
anything else.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "20.0" <mesa-stable@lists.freedesktop.org>
This commit is contained in:
Ilia Mirkin 2019-02-03 23:25:07 -05:00
parent e02c181bfd
commit 869e32593a
4 changed files with 203 additions and 55 deletions

View File

@ -122,6 +122,8 @@ private:
void emitSAM();
void emitRAM();
void emitPSETP();
void emitMOV();
void emitS2R();
void emitCS2R();
@ -690,6 +692,31 @@ CodeEmitterGM107::emitRAM()
* predicate/cc
******************************************************************************/
void
CodeEmitterGM107::emitPSETP()
{
emitInsn(0x50900000);
switch (insn->op) {
case OP_AND: emitField(0x18, 3, 0); break;
case OP_OR: emitField(0x18, 3, 1); break;
case OP_XOR: emitField(0x18, 3, 2); break;
default:
assert(!"unexpected operation");
break;
}
// emitINV (0x2a);
emitPRED(0x27); // TODO: support 3-arg
emitINV (0x20, insn->src(1));
emitPRED(0x1d, insn->src(1));
emitINV (0x0f, insn->src(0));
emitPRED(0x0c, insn->src(0));
emitPRED(0x03, insn->def(0));
emitPRED(0x00);
}
/*******************************************************************************
* movement / conversion
******************************************************************************/
@ -3557,7 +3584,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_AND:
case OP_OR:
case OP_XOR:
emitLOP();
switch (insn->def(0).getFile()) {
case FILE_GPR: emitLOP(); break;
case FILE_PREDICATE: emitPSETP(); break;
default:
assert(!"invalid bool op");
}
break;
case OP_NOT:
emitNOT();

View File

@ -1802,6 +1802,9 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
{
uint32_t base = slot * NVC0_SU_INFO__STRIDE;
// We don't upload surface info for bindless for GM107+
assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET);
if (ptr) {
ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
if (bindless)
@ -2204,7 +2207,7 @@ getDestType(const ImgType type) {
}
void
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded)
{
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int width = format->bits[0] + format->bits[1] +
@ -2223,21 +2226,38 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
if (width < 32)
untypedDst[0] = bld.getSSA();
for (int i = 0; i < 4; i++) {
typedDst[i] = su->getDef(i);
if (loaded && loaded[0]) {
for (int i = 0; i < 4; i++) {
if (loaded[i])
typedDst[i] = loaded[i]->getDef(0);
}
} else {
for (int i = 0; i < 4; i++) {
typedDst[i] = su->getDef(i);
}
}
// Set the untyped dsts as the su's destinations
for (int i = 0; i < 4; i++)
su->setDef(i, untypedDst[i]);
if (loaded && loaded[0]) {
for (int i = 0; i < 4; i++)
if (loaded[i])
loaded[i]->setDef(0, untypedDst[i]);
} else {
for (int i = 0; i < 4; i++)
su->setDef(i, untypedDst[i]);
bld.setPosition(su, true);
bld.setPosition(su, true);
}
// Unpack each component into the typed dsts
int bits = 0;
for (int i = 0; i < 4; bits += format->bits[i], i++) {
if (!typedDst[i])
continue;
if (loaded && loaded[0])
bld.setPosition(loaded[i], true);
if (i >= format->components) {
if (format->type == FLOAT ||
format->type == UNORM ||
@ -2308,7 +2328,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
processSurfaceCoordsNVE4(su);
if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
convertSurfaceFormat(su, NULL);
insertOOBSurfaceOpResult(su);
}
@ -2421,7 +2441,7 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
processSurfaceCoordsNVC0(su);
if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
convertSurfaceFormat(su, NULL);
insertOOBSurfaceOpResult(su);
}
@ -2463,14 +2483,16 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
}
}
void
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
TexInstruction *
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su, Instruction *ret[4])
{
const int slot = su->tex.r;
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
const bool array = su->tex.target.isArray() || su->tex.target.isCube();
const int arg = dim + array;
Value *ind = su->getIndirectR();
Value *handle;
Instruction *pred = NULL, *pred2d = NULL;
int pos = 0;
bld.setPosition(su, false);
@ -2489,67 +2511,153 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
assert(pos == 0);
break;
}
if (dim == 2 && !array) {
// This might be a 2d slice of a 3d texture, try to load the z
// coordinate in.
Value *v;
if (!su->tex.bindless)
v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
else
v = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), ind, bld.mkImm(11));
Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1));
pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0), is_3d);
bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16));
su->moveSources(dim, 1);
su->setSrc(dim, v);
su->tex.target = nv50_ir::TEX_TARGET_3D;
pos++;
}
if (su->tex.bindless)
handle = ind;
handle = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ind, bld.mkImm(2047));
else
handle = loadTexHandle(ind, slot + 32);
su->setSrc(arg + pos, handle);
// The address check doesn't make sense here. The format check could make
// sense but it's a bit of a pain.
if (su->tex.bindless)
return;
if (!su->tex.bindless) {
// prevent read fault when the image is not actually bound
pred =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0),
loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
if (su->op != OP_SUSTP && su->tex.format) {
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int blockwidth = format->bits[0] + format->bits[1] +
format->bits[2] + format->bits[3];
// prevent read fault when the image is not actually bound
CmpInstruction *pred =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0),
loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
if (su->op != OP_SUSTP && su->tex.format) {
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int blockwidth = format->bits[0] + format->bits[1] +
format->bits[2] + format->bits[3];
assert(format->components != 0);
// make sure that the format doesn't mismatch when it's not FMT_NONE
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
pred->getDef(0));
assert(format->components != 0);
// make sure that the format doesn't mismatch when it's not FMT_NONE
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
pred->getDef(0));
}
}
su->setPredicate(CC_NOT_P, pred->getDef(0));
// Now we have "pred" which (optionally) contains whether to do the surface
// op at all, and a "pred2d" which indicates that, in case of doing the
// surface op, we have to create a 2d and 3d version, conditioned on pred2d.
TexInstruction *su2d = NULL;
if (pred2d) {
su2d = cloneForward(func, su)->asTex();
for (unsigned i = 0; su->defExists(i); ++i)
su2d->setDef(i, bld.getSSA());
su2d->moveSources(dim + 1, -1);
su2d->tex.target = nv50_ir::TEX_TARGET_2D;
}
if (pred2d && pred) {
Instruction *pred3d = bld.mkOp2(OP_AND, TYPE_U8,
bld.getSSA(1, FILE_PREDICATE),
pred->getDef(0), pred2d->getDef(0));
pred3d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
pred3d->src(1).mod = Modifier(NV50_IR_MOD_NOT);
su->setPredicate(CC_P, pred3d->getDef(0));
pred2d = bld.mkOp2(OP_AND, TYPE_U8, bld.getSSA(1, FILE_PREDICATE),
pred->getDef(0), pred2d->getDef(0));
pred2d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
} else if (pred) {
su->setPredicate(CC_NOT_P, pred->getDef(0));
} else if (pred2d) {
su->setPredicate(CC_NOT_P, pred2d->getDef(0));
}
if (su2d) {
su2d->setPredicate(CC_P, pred2d->getDef(0));
bld.insert(su2d);
// Create a UNION so that RA assigns the same registers
bld.setPosition(su, true);
for (unsigned i = 0; su->defExists(i); ++i) {
assert(i < 4);
ValueDef &def = su->def(i);
ValueDef &def2 = su2d->def(i);
Instruction *mov = NULL;
if (pred) {
mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
mov->setPredicate(CC_P, pred->getDef(0));
}
Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
bld.getSSA(),
NULL, def2.get());
def.replace(uni->getDef(0), false);
uni->setSrc(0, def.get());
if (mov)
uni->setSrc(2, mov->getDef(0));
}
} else if (pred) {
// Create a UNION so that RA assigns the same registers
bld.setPosition(su, true);
for (unsigned i = 0; su->defExists(i); ++i) {
assert(i < 4);
ValueDef &def = su->def(i);
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
mov->setPredicate(CC_P, pred->getDef(0));
Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
bld.getSSA(),
NULL, mov->getDef(0));
def.replace(uni->getDef(0), false);
uni->setSrc(0, def.get());
}
}
return su2d;
}
void
NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
{
processSurfaceCoordsGM107(su);
// processSurfaceCoords also takes care of fixing up the outputs and
// union'ing them with 0 as necessary. Additionally it may create a second
// surface which needs some of the similar fixups.
Instruction *loaded[4] = {};
TexInstruction *su2 = processSurfaceCoordsGM107(su, loaded);
if (su->op == OP_SULDP) {
convertSurfaceFormat(su);
insertOOBSurfaceOpResult(su);
convertSurfaceFormat(su, loaded);
}
if (su->op == OP_SUREDP) {
Value *def = su->getDef(0);
su->op = OP_SUREDB;
}
// There may not be a predicate in the bindless case.
if (su->getPredicate()) {
su->setDef(0, bld.getSSA());
bld.setPosition(su, true);
// make sure to initialize dst value when the atomic operation is not
// performed
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
assert(su->cc == CC_NOT_P);
mov->setPredicate(CC_P, su->getPredicate());
bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
}
// If we fixed up the type of the regular surface load instruction, we also
// have to fix up the copy.
if (su2) {
su2->op = su->op;
su2->dType = su->dType;
su2->sType = su->sType;
}
}

View File

@ -171,10 +171,10 @@ private:
Value *loadMsInfo32(Value *ptr, uint32_t off);
void adjustCoordinatesMS(TexInstruction *);
void processSurfaceCoordsGM107(TexInstruction *);
TexInstruction *processSurfaceCoordsGM107(TexInstruction *, Instruction *[4]);
void processSurfaceCoordsNVE4(TexInstruction *);
void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
void convertSurfaceFormat(TexInstruction *, Instruction **);
void insertOOBSurfaceOpResult(TexInstruction *);
Value *calculateSampleOffset(Value *sampleID);

View File

@ -1433,7 +1433,15 @@ gm107_create_image_handle(struct pipe_context *pipe,
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
return 0x100000000ULL | tic->id;
// Compute handle. This will include the TIC as well as some additional
// info regarding the bound 3d surface layer, if applicable.
uint64_t handle = 0x100000000ULL | tic->id;
struct nv04_resource *res = nv04_resource(view->resource);
if (res->base.target == PIPE_TEXTURE_3D) {
handle |= 1 << 11;
handle |= view->u.tex.first_layer << (11 + 16);
}
return handle;
fail:
FREE(tic);