nv50/ir: add support for BUFFER accesses

This largely leaves the existing image logic alone. When image support
is added this will have to be harmonized somehow.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ilia Mirkin 2016-01-02 22:25:31 -05:00
parent abe427ebd2
commit c3083c7082
6 changed files with 147 additions and 11 deletions

View File

@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
case TYPE_S64:
case TYPE_U64:
return reg.data.s64 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast<float>(i);
case TYPE_F64:

View File

@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode);
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
static nv50_ir::CacheMode translateCacheMode(uint qualifier);
class Instruction
{
@ -213,6 +214,12 @@ public:
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
nv50_ir::CacheMode getCacheMode() const {
if (!insn->Instruction.Memory)
return nv50_ir::CACHE_CA;
return translateCacheMode(insn->Memory.Qualifier);
}
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
//case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex)
}
}
static nv50_ir::CacheMode translateCacheMode(uint qualifier)
{
if (qualifier & TGSI_MEMORY_VOLATILE)
return nv50_ir::CACHE_CV;
if (qualifier & TGSI_MEMORY_COHERENT)
return nv50_ir::CACHE_CG;
return nv50_ir::CACHE_CA;
}
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
indirectTempArrays.insert(insn.getDst(0).getArrayId());
} else
if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
info->io.globalAccess |= 0x2;
}
}
@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
/*
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
if (src.getFile() == TGSI_FILE_BUFFER) {
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
return NULL;
if (dst.isIndirect(0) ||
@ -2222,6 +2239,25 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
Value *off = fetchSrc(1, c);
Symbol *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
} else {
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
}
mkLoad(TYPE_U32, dst0[c], sym, off)->cache = tgsi.getCacheMode();
}
return;
}
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@ -2298,6 +2334,28 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
Symbol *sym;
Value *off;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
} else {
off = fetchSrc(0, 0);
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
}
mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c))
->cache = tgsi.getCacheMode();
}
return;
}
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@ -2359,6 +2417,32 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
Instruction *insn;
Value *off = fetchSrc(1, c);
Value *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
else
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
insn->setIndirect(0, 0, off);
insn->subOp = subOp;
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
insn->setSrc(2, fetchSrc(3, 0));
}
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
return;
}
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {

View File

@ -1022,11 +1022,11 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i)
return true;
}
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
Value *ptr = atom->getIndirect(0, 0), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
@ -1037,11 +1037,16 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(NULL, atom->getSrc(0)->reg.fileIndex * 16);
assert(base->reg.size == 8);
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
assert(base->reg.size == 8);
atom->setIndirect(0, 0, base);
return true;
}
Value *base =
base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
Value *ptr = atom->getIndirect(0, 0);
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
@ -1093,6 +1098,26 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
inline Value *
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
return bld.
mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
}
inline Value *
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
}
inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
@ -1786,6 +1811,7 @@ NVC0LoweringPass::visit(Instruction *i)
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
@ -1820,6 +1846,24 @@ NVC0LoweringPass::visit(Instruction *i)
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
} else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
Value *ptr = loadResInfo64(NULL, i->getSrc(0)->reg.fileIndex * 16);
// XXX come up with a way not to do this for EVERY little access but
// rather to batch these up somehow. Unfortunately we've lost the
// information about the field width by the time we get here.
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
Value *length = loadResLength32(NULL, i->getSrc(0)->reg.fileIndex * 16);
Value *pred = new_LValue(func, FILE_PREDICATE);
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
}
i->setIndirect(0, 0, ptr);
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
i->setPredicate(CC_NOT_P, pred);
if (i->defExists(0)) {
bld.mkMov(i->getDef(0), bld.mkImm(0));
}
}
break;
case OP_ATOM:

View File

@ -116,6 +116,8 @@ private:
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
Value *loadResInfo64(Value *ptr, uint32_t off);
Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);

View File

@ -3044,7 +3044,7 @@ Instruction::isResultEqual(const Instruction *that) const
if (that->srcExists(s))
return false;
if (op == OP_LOAD || op == OP_VFETCH) {
if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:

View File

@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
info->io.suInfoBase = 512;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
*/
if (info->io.globalAccess)
prog->hdr[0] |= 1 << 26;
if (info->io.globalAccess & 0x2)
prog->hdr[0] |= 1 << 16;
if (info->io.fp64)
prog->hdr[0] |= 1 << 27;