nvc0/ir: implement lowering of surface ops for nve4
This commit is contained in:
parent
8ac68b071d
commit
4506ed28de
|
@ -932,7 +932,7 @@ const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
|
|||
{
|
||||
{ "1D", 1, 1, false, false, false },
|
||||
{ "2D", 2, 2, false, false, false },
|
||||
{ "2D_MS", 2, 2, false, false, false },
|
||||
{ "2D_MS", 2, 3, false, false, false },
|
||||
{ "3D", 3, 3, false, false, false },
|
||||
{ "CUBE", 2, 3, false, true, false },
|
||||
{ "1D_SHADOW", 1, 1, false, false, true },
|
||||
|
@ -940,7 +940,7 @@ const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
|
|||
{ "CUBE_SHADOW", 2, 3, false, true, true },
|
||||
{ "1D_ARRAY", 1, 2, true, false, false },
|
||||
{ "2D_ARRAY", 2, 3, true, false, false },
|
||||
{ "2D_MS_ARRAY", 2, 3, true, false, false },
|
||||
{ "2D_MS_ARRAY", 2, 4, true, false, false },
|
||||
{ "CUBE_ARRAY", 2, 4, true, true, false },
|
||||
{ "1D_ARRAY_SHADOW", 1, 2, true, false, true },
|
||||
{ "2D_ARRAY_SHADOW", 2, 3, true, false, true },
|
||||
|
@ -1137,6 +1137,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
|
|||
nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
|
||||
if (!prog)
|
||||
return -1;
|
||||
prog->driver = info;
|
||||
prog->dbgFlags = info->dbgFlags;
|
||||
prog->optLevel = info->optLevel;
|
||||
|
||||
|
|
|
@ -829,6 +829,9 @@ public:
|
|||
}
|
||||
|
||||
inline bool operator==(TexTarget targ) const { return target == targ; }
|
||||
inline bool operator!=(TexTarget targ) const { return target != targ; }
|
||||
|
||||
enum TexTarget getEnum() const { return target; }
|
||||
|
||||
private:
|
||||
struct Desc
|
||||
|
@ -1149,6 +1152,8 @@ public:
|
|||
|
||||
void *targetPriv; // e.g. to carry information between passes
|
||||
|
||||
const struct nv50_ir_prog_info *driver; // for driver configuration
|
||||
|
||||
void releaseInstruction(Instruction *);
|
||||
void releaseValue(Value *);
|
||||
};
|
||||
|
|
|
@ -169,7 +169,7 @@ struct nv50_ir_prog_info
|
|||
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
|
||||
int8_t genUserClip; /* request user clip planes for ClipVertex */
|
||||
uint16_t ucpBase; /* base address for UCPs */
|
||||
uint8_t ucpBinding; /* constant buffer index of UCP data */
|
||||
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
|
||||
uint8_t pointSize; /* output index for PointSize */
|
||||
uint8_t instanceId; /* system value index of InstanceID */
|
||||
uint8_t vertexId; /* system value index of VertexID */
|
||||
|
@ -179,6 +179,11 @@ struct nv50_ir_prog_info
|
|||
uint8_t sampleMask; /* output index of SampleMask */
|
||||
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
|
||||
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
|
||||
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
|
||||
uint16_t texBindBase; /* base address for tex handles (nve4) */
|
||||
uint16_t suInfoBase; /* base address for surface info (nve4) */
|
||||
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
|
||||
uint16_t msInfoBase; /* base address for multisample info */
|
||||
} io;
|
||||
|
||||
/* driver callback to assign input/output locations */
|
||||
|
|
|
@ -2285,7 +2285,7 @@ Converter::handleUserClipPlanes()
|
|||
|
||||
for (c = 0; c < 4; ++c) {
|
||||
for (i = 0; i < info->io.genUserClip; ++i) {
|
||||
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding,
|
||||
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
|
||||
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
|
||||
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
|
||||
if (c == 0)
|
||||
|
|
|
@ -1852,17 +1852,23 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
|
|||
void
|
||||
RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
|
||||
{
|
||||
textureMask(tex);
|
||||
if (isTextureOp(tex->op))
|
||||
textureMask(tex);
|
||||
condenseDefs(tex);
|
||||
|
||||
int n = tex->srcCount(0xff, true);
|
||||
if (n > 4) {
|
||||
condenseSrcs(tex, 0, 3);
|
||||
if (n > 5) // NOTE: first call modified positions already
|
||||
condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
|
||||
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
|
||||
condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
|
||||
} else
|
||||
if (n > 1) {
|
||||
condenseSrcs(tex, 0, n - 1);
|
||||
if (isTextureOp(tex->op)) {
|
||||
int n = tex->srcCount(0xff, true);
|
||||
if (n > 4) {
|
||||
condenseSrcs(tex, 0, 3);
|
||||
if (n > 5) // NOTE: first call modified positions already
|
||||
condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
|
||||
} else
|
||||
if (n > 1) {
|
||||
condenseSrcs(tex, 0, n - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -315,7 +315,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
|||
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
|
||||
info->bin.source = (void *)prog->pipe.tokens;
|
||||
|
||||
info->io.ucpBinding = 15;
|
||||
info->io.ucpCBSlot = 15;
|
||||
info->io.ucpBase = 0;
|
||||
info->io.genUserClip = prog->vp.clpd_nr;
|
||||
|
||||
|
|
|
@ -468,7 +468,9 @@ NVC0LegalizePostRA::visit(Function *fn)
|
|||
insertTextureBarriers(fn);
|
||||
|
||||
rZero = new_LValue(fn, FILE_GPR);
|
||||
|
||||
rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -476,6 +478,8 @@ void
|
|||
NVC0LegalizePostRA::replaceZero(Instruction *i)
|
||||
{
|
||||
for (int s = 0; i->srcExists(s); ++s) {
|
||||
if (s == 2 && i->op == OP_SUCLAMP)
|
||||
continue;
|
||||
ImmediateValue *imm = i->getSrc(s)->asImm();
|
||||
if (imm && imm->reg.data.u64 == 0)
|
||||
i->setSrc(s, rZero);
|
||||
|
@ -599,22 +603,31 @@ private:
|
|||
bool handleTXQ(TexInstruction *);
|
||||
bool handleManualTXD(TexInstruction *);
|
||||
bool handleATOM(Instruction *);
|
||||
void handleSurfaceOpNVE4(TexInstruction *);
|
||||
|
||||
void checkPredicate(Instruction *);
|
||||
|
||||
void readTessCoord(LValue *dst, int c);
|
||||
|
||||
Value *loadResInfo32(Value *ptr, uint32_t off);
|
||||
Value *loadMsInfo32(Value *ptr, uint32_t off);
|
||||
|
||||
void adjustCoordinatesMS(TexInstruction *);
|
||||
void processSurfaceCoordsNVE4(TexInstruction *);
|
||||
|
||||
private:
|
||||
const Target *const targ;
|
||||
|
||||
BuildUtil bld;
|
||||
|
||||
Symbol *gMemBase;
|
||||
LValue *gpEmitAddress;
|
||||
};
|
||||
|
||||
NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
|
||||
{
|
||||
bld.setProgram(prog);
|
||||
gMemBase = NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -646,9 +659,9 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
|||
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
|
||||
const int arg = i->tex.target.getArgCount();
|
||||
|
||||
if (prog->getTarget()->getChipset() >= 0xe0) {
|
||||
if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) {
|
||||
if (i->tex.r == i->tex.s) {
|
||||
i->tex.r += 8; // NOTE: offset should probably be a driver option
|
||||
i->tex.r += prog->driver->io.texBindBase / 4;
|
||||
i->tex.s = 0; // only a single cX[] value possible here
|
||||
} else {
|
||||
// TODO: extract handles and use register to select TIC/TSC entries
|
||||
|
@ -866,6 +879,362 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
|
|||
return true;
|
||||
}
|
||||
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
return bld.
|
||||
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
|
||||
}
|
||||
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.msInfoCBSlot;
|
||||
off += prog->driver->io.msInfoBase;
|
||||
return bld.
|
||||
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
|
||||
}
|
||||
|
||||
/* On nvc0, surface info is obtained via the surface binding points passed
|
||||
* to the SULD/SUST instructions.
|
||||
* On nve4, surface info is stored in c[] and is used by various special
|
||||
* instructions, e.g. for clamping coordiantes or generating an address.
|
||||
* They couldn't just have added an equivalent to TIC now, couldn't they ?
|
||||
*/
|
||||
#define NVE4_SU_INFO_ADDR 0x00
|
||||
#define NVE4_SU_INFO_FMT 0x04
|
||||
#define NVE4_SU_INFO_DIM_X 0x08
|
||||
#define NVE4_SU_INFO_PITCH 0x0c
|
||||
#define NVE4_SU_INFO_DIM_Y 0x10
|
||||
#define NVE4_SU_INFO_ARRAY 0x14
|
||||
#define NVE4_SU_INFO_DIM_Z 0x18
|
||||
#define NVE4_SU_INFO_UNK1C 0x1c
|
||||
#define NVE4_SU_INFO_WIDTH 0x20
|
||||
#define NVE4_SU_INFO_HEIGHT 0x24
|
||||
#define NVE4_SU_INFO_DEPTH 0x28
|
||||
#define NVE4_SU_INFO_TARGET 0x2c
|
||||
#define NVE4_SU_INFO_CALL 0x30
|
||||
#define NVE4_SU_INFO_RAW_X 0x34
|
||||
#define NVE4_SU_INFO_MS_X 0x38
|
||||
#define NVE4_SU_INFO_MS_Y 0x3c
|
||||
|
||||
#define NVE4_SU_INFO__STRIDE 0x40
|
||||
|
||||
#define NVE4_SU_INFO_DIM(i) (0x08 + (i) * 8)
|
||||
#define NVE4_SU_INFO_SIZE(i) (0x20 + (i) * 4)
|
||||
#define NVE4_SU_INFO_MS(i) (0x38 + (i) * 4)
|
||||
|
||||
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
|
||||
{
|
||||
switch (su->tex.target.getEnum()) {
|
||||
case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1);
|
||||
case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_1D_ARRAY: return (c == 1) ?
|
||||
NV50_IR_SUBOP_SUCLAMP_PL(0, 2) :
|
||||
NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2);
|
||||
case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2);
|
||||
case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
|
||||
default:
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
|
||||
{
|
||||
const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE;
|
||||
const int arg = tex->tex.target.getArgCount();
|
||||
|
||||
if (tex->tex.target == TEX_TARGET_2D_MS)
|
||||
tex->tex.target = TEX_TARGET_2D;
|
||||
else
|
||||
if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY)
|
||||
tex->tex.target = TEX_TARGET_2D_ARRAY;
|
||||
else
|
||||
return;
|
||||
|
||||
Value *x = tex->getSrc(0);
|
||||
Value *y = tex->getSrc(1);
|
||||
Value *s = tex->getSrc(arg - 1);
|
||||
|
||||
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
|
||||
|
||||
Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0));
|
||||
Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1));
|
||||
|
||||
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
|
||||
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
|
||||
|
||||
s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7));
|
||||
s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3));
|
||||
|
||||
Value *dx = loadMsInfo32(ts, 0x0);
|
||||
Value *dy = loadMsInfo32(ts, 0x4);
|
||||
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx);
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy);
|
||||
|
||||
tex->setSrc(0, tx);
|
||||
tex->setSrc(1, ty);
|
||||
tex->moveSources(arg, -1);
|
||||
}
|
||||
|
||||
// Sets 64-bit "generic address", predicate and format sources for SULD/SUST.
|
||||
// They're computed from the coordinates using the surface info in c[] space.
|
||||
void
|
||||
NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
|
||||
{
|
||||
Instruction *insn;
|
||||
const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP;
|
||||
const bool raw =
|
||||
su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB;
|
||||
const int idx = su->tex.r;
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() ? 1 : 0);
|
||||
const uint16_t base = idx * NVE4_SU_INFO__STRIDE;
|
||||
int c;
|
||||
Value *zero = bld.mkImm(0);
|
||||
Value *p1 = NULL;
|
||||
Value *v;
|
||||
Value *src[3];
|
||||
Value *bf, *eau, *off;
|
||||
Value *addr, *pred;
|
||||
|
||||
off = bld.getScratch(4);
|
||||
bf = bld.getScratch(4);
|
||||
addr = bld.getSSA(8);
|
||||
pred = bld.getScratch(1, FILE_PREDICATE);
|
||||
|
||||
bld.setPosition(su, false);
|
||||
|
||||
adjustCoordinatesMS(su);
|
||||
|
||||
// calculate clamped coordinates
|
||||
for (c = 0; c < arg; ++c) {
|
||||
src[c] = bld.getScratch();
|
||||
if (c == 0 && raw)
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
|
||||
else
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
|
||||
bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
|
||||
->subOp = getSuClampSubOp(su, c);
|
||||
}
|
||||
for (; c < 3; ++c)
|
||||
src[c] = zero;
|
||||
|
||||
// set predicate output
|
||||
if (su->tex.target == TEX_TARGET_BUFFER) {
|
||||
src[0]->getInsn()->setFlagsDef(1, pred);
|
||||
} else
|
||||
if (su->tex.target.isArray()) {
|
||||
p1 = bld.getSSA(1, FILE_PREDICATE);
|
||||
src[dim]->getInsn()->setFlagsDef(1, p1);
|
||||
}
|
||||
|
||||
// calculate pixel offset
|
||||
if (dim == 1) {
|
||||
if (su->tex.target != TEX_TARGET_BUFFER)
|
||||
bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
|
||||
} else
|
||||
if (dim == 3) {
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
|
||||
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
|
||||
->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
|
||||
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
|
||||
bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
|
||||
->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
|
||||
} else {
|
||||
assert(dim == 2);
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
|
||||
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
|
||||
->subOp = su->tex.target.isArray() ?
|
||||
NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
|
||||
}
|
||||
|
||||
// calculate effective address part 1
|
||||
if (su->tex.target == TEX_TARGET_BUFFER) {
|
||||
if (raw) {
|
||||
bf = src[0];
|
||||
} else {
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
|
||||
bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
|
||||
->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
|
||||
}
|
||||
} else {
|
||||
Value *y = src[1];
|
||||
Value *z = src[2];
|
||||
uint16_t subOp = 0;
|
||||
|
||||
switch (dim) {
|
||||
case 1:
|
||||
y = zero;
|
||||
z = zero;
|
||||
break;
|
||||
case 2:
|
||||
z = off;
|
||||
if (!su->tex.target.isArray()) {
|
||||
z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
|
||||
subOp = NV50_IR_SUBOP_SUBFM_3D;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
subOp = NV50_IR_SUBOP_SUBFM_3D;
|
||||
assert(dim == 3);
|
||||
break;
|
||||
}
|
||||
insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z);
|
||||
insn->subOp = subOp;
|
||||
insn->setFlagsDef(1, pred);
|
||||
}
|
||||
|
||||
// part 2
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR);
|
||||
|
||||
if (su->tex.target == TEX_TARGET_BUFFER) {
|
||||
eau = v;
|
||||
} else {
|
||||
eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v);
|
||||
}
|
||||
// add array layer offset
|
||||
if (su->tex.target.isArray()) {
|
||||
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
|
||||
if (dim == 1)
|
||||
bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
|
||||
->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
|
||||
else
|
||||
bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau)
|
||||
->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32
|
||||
// combine predicates
|
||||
assert(p1);
|
||||
bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1);
|
||||
}
|
||||
|
||||
if (atom) {
|
||||
Value *lo = bf;
|
||||
if (su->tex.target == TEX_TARGET_BUFFER) {
|
||||
lo = zero;
|
||||
bld.mkMov(off, bf);
|
||||
}
|
||||
// bf == g[] address & 0xff
|
||||
// eau == g[] address >> 8
|
||||
bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau);
|
||||
bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau);
|
||||
} else
|
||||
if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) {
|
||||
// Convert from u32 to u8 address format, which is what the library code
|
||||
// doing SULDP currently uses.
|
||||
// XXX: can SUEAU do this ?
|
||||
// XXX: does it matter that we don't mask high bytes in bf ?
|
||||
// Grrr.
|
||||
bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8));
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off);
|
||||
}
|
||||
|
||||
bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau);
|
||||
|
||||
if (atom && su->tex.target == TEX_TARGET_BUFFER)
|
||||
bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off);
|
||||
|
||||
// let's just set it 0 for raw access and hope it works
|
||||
v = raw ?
|
||||
bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
|
||||
|
||||
// get rid of old coordinate sources, make space for fmt info and predicate
|
||||
su->moveSources(arg, 3 - arg);
|
||||
// set 64 bit address and 32-bit format sources
|
||||
su->setSrc(0, addr);
|
||||
su->setSrc(1, v);
|
||||
su->setSrc(2, pred);
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
||||
{
|
||||
processSurfaceCoordsNVE4(su);
|
||||
|
||||
// Who do we hate more ? The person who decided that nvc0's SULD doesn't
|
||||
// have to support conversion or the person who decided that, in OpenCL,
|
||||
// you don't have to specify the format here like you do in OpenGL ?
|
||||
|
||||
if (su->op == OP_SULDP) {
|
||||
// We don't patch shaders. Ever.
|
||||
// You get an indirect call to our library blob here.
|
||||
// But at least it's uniform.
|
||||
FlowInstruction *call;
|
||||
LValue *p[3];
|
||||
LValue *r[5];
|
||||
uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
(r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i;
|
||||
for (int i = 0; i < 3; ++i)
|
||||
(p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i;
|
||||
(r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4;
|
||||
|
||||
bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8);
|
||||
bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8);
|
||||
bld.mkMov(p[0], su->getSrc(2), TYPE_U8);
|
||||
bld.mkMov(r[4], su->getSrc(0), TYPE_U64);
|
||||
bld.mkMov(r[2], su->getSrc(1), TYPE_U32);
|
||||
|
||||
call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate());
|
||||
|
||||
call->indirect = 1;
|
||||
call->absolute = 1;
|
||||
call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST,
|
||||
prog->driver->io.resInfoCBSlot, TYPE_U32,
|
||||
prog->driver->io.suInfoBase + base));
|
||||
call->setSrc(1, r[2]);
|
||||
call->setSrc(2, r[4]);
|
||||
for (int i = 0; i < 3; ++i)
|
||||
call->setSrc(3 + i, p[i]);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
call->setDef(i, r[i]);
|
||||
bld.mkMov(su->getDef(i), r[i]);
|
||||
}
|
||||
call->setDef(4, p[1]);
|
||||
delete_Instruction(bld.getProgram(), su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
|
||||
Value *pred = su->getSrc(2);
|
||||
CondCode cc = CC_NOT_P;
|
||||
if (su->getPredicate()) {
|
||||
pred = bld.getScratch(1, FILE_PREDICATE);
|
||||
cc = su->cc;
|
||||
if (cc == CC_NOT_P) {
|
||||
bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2));
|
||||
} else {
|
||||
bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2));
|
||||
pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT);
|
||||
}
|
||||
}
|
||||
Instruction *red = bld.mkOp(OP_ATOM, su->dType, su->getDef(0));
|
||||
red->subOp = su->subOp;
|
||||
if (!gMemBase)
|
||||
gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0);
|
||||
red->setSrc(0, gMemBase);
|
||||
red->setSrc(1, su->getSrc(3));
|
||||
if (su->subOp == NV50_IR_SUBOP_ATOM_CAS)
|
||||
red->setSrc(2, su->getSrc(4));
|
||||
red->setIndirect(0, 0, su->getSrc(0));
|
||||
red->setPredicate(cc, pred);
|
||||
delete_Instruction(bld.getProgram(), su);
|
||||
} else {
|
||||
su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LoweringPass::handleWRSV(Instruction *i)
|
||||
{
|
||||
|
@ -1127,6 +1496,15 @@ NVC0LoweringPass::visit(Instruction *i)
|
|||
case OP_ATOM:
|
||||
handleATOM(i);
|
||||
break;
|
||||
case OP_SULDB:
|
||||
case OP_SULDP:
|
||||
case OP_SUSTB:
|
||||
case OP_SUSTP:
|
||||
case OP_SUREDB:
|
||||
case OP_SUREDP:
|
||||
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
|
||||
handleSurfaceOpNVE4(i->asTex());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -564,7 +564,25 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
|
||||
info->io.genUserClip = prog->vp.num_ucps;
|
||||
info->io.ucpBase = 256;
|
||||
info->io.ucpBinding = 15;
|
||||
info->io.ucpCBSlot = 15;
|
||||
|
||||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
info->io.resInfoCBSlot = 0;
|
||||
info->io.texBindBase = 0; /* TODO */
|
||||
info->io.suInfoBase = 0; /* TODO */
|
||||
}
|
||||
info->io.msInfoCBSlot = 0;
|
||||
info->io.msInfoBase = 0; /* TODO */
|
||||
} else {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
info->io.resInfoCBSlot = 15;
|
||||
info->io.texBindBase = 0x20;
|
||||
info->io.suInfoBase = 0; /* TODO */
|
||||
}
|
||||
info->io.msInfoCBSlot = 15;
|
||||
info->io.msInfoBase = 0; /* TODO */
|
||||
}
|
||||
|
||||
info->assignSlots = nvc0_program_assign_varying_slots;
|
||||
|
||||
|
|
Loading…
Reference in New Issue