nv50/ir: only use FILE_LOCAL_MEMORY for temp arrays that use indirection
Previously we were treating any indirect temp array usage to mean that everything should end up in lmem. The MemoryOpt pass would clean a lot of that up later, but in the meanwhile we would lose a lot of opportunity for optimization. This helps a lot of Metro 2033 Redux and a handful of KSP shaders: total instructions in shared programs : 6288373 -> 6261517 (-0.43%) total gprs used in shared programs : 944051 -> 945131 (0.11%) total local used in shared programs : 54116 -> 54116 (0.00%) A typical case is for register usage to double and for instructions to halve. A future commit can also optimize local memory usage size to be reduced with better packing. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
37b67db6ae
commit
e231f59b6d
|
@ -95,6 +95,13 @@ public:
|
||||||
return tgsi_util_get_src_register_swizzle(®, chan);
|
return tgsi_util_get_src_register_swizzle(®, chan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getArrayId() const
|
||||||
|
{
|
||||||
|
if (isIndirect(0))
|
||||||
|
return fsr->Indirect.ArrayID;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
nv50_ir::Modifier getMod(int chan) const;
|
nv50_ir::Modifier getMod(int chan) const;
|
||||||
|
|
||||||
SrcRegister getIndirect(int dim) const
|
SrcRegister getIndirect(int dim) const
|
||||||
|
@ -154,6 +161,13 @@ public:
|
||||||
return SrcRegister(fdr->Indirect);
|
return SrcRegister(fdr->Indirect);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getArrayId() const
|
||||||
|
{
|
||||||
|
if (isIndirect(0))
|
||||||
|
return fdr->Indirect.ArrayID;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const struct tgsi_dst_register reg;
|
const struct tgsi_dst_register reg;
|
||||||
const struct tgsi_full_dst_register *fdr;
|
const struct tgsi_full_dst_register *fdr;
|
||||||
|
@ -809,7 +823,8 @@ public:
|
||||||
// these registers are per-subroutine, cannot be used for parameter passing
|
// these registers are per-subroutine, cannot be used for parameter passing
|
||||||
std::set<Location> locals;
|
std::set<Location> locals;
|
||||||
|
|
||||||
bool mainTempsInLMem;
|
std::set<int> indirectTempArrays;
|
||||||
|
std::vector<int> tempArrayId;
|
||||||
|
|
||||||
int clipVertexOutput;
|
int clipVertexOutput;
|
||||||
|
|
||||||
|
@ -841,8 +856,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
|
||||||
|
|
||||||
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
|
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
|
||||||
tgsi_dump(tokens, 0);
|
tgsi_dump(tokens, 0);
|
||||||
|
|
||||||
mainTempsInLMem = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Source::~Source()
|
Source::~Source()
|
||||||
|
@ -872,6 +885,7 @@ bool Source::scanSource()
|
||||||
|
|
||||||
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
|
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
|
||||||
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
|
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
|
||||||
|
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
|
||||||
|
|
||||||
info->immd.bufSize = 0;
|
info->immd.bufSize = 0;
|
||||||
|
|
||||||
|
@ -917,7 +931,8 @@ bool Source::scanSource()
|
||||||
}
|
}
|
||||||
tgsi_parse_free(&parse);
|
tgsi_parse_free(&parse);
|
||||||
|
|
||||||
if (mainTempsInLMem)
|
// TODO: Compute based on relevant array sizes
|
||||||
|
if (indirectTempArrays.size())
|
||||||
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
|
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
|
||||||
|
|
||||||
if (info->io.genUserClip > 0) {
|
if (info->io.genUserClip > 0) {
|
||||||
|
@ -1028,6 +1043,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
||||||
unsigned sn = TGSI_SEMANTIC_GENERIC;
|
unsigned sn = TGSI_SEMANTIC_GENERIC;
|
||||||
unsigned si = 0;
|
unsigned si = 0;
|
||||||
const unsigned first = decl->Range.First, last = decl->Range.Last;
|
const unsigned first = decl->Range.First, last = decl->Range.Last;
|
||||||
|
const int arrayId = decl->Array.ArrayID;
|
||||||
|
|
||||||
if (decl->Declaration.Semantic) {
|
if (decl->Declaration.Semantic) {
|
||||||
sn = decl->Semantic.Name;
|
sn = decl->Semantic.Name;
|
||||||
|
@ -1172,8 +1188,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
||||||
for (i = first; i <= last; ++i)
|
for (i = first; i <= last; ++i)
|
||||||
textureViews[i].target = decl->SamplerView.Resource;
|
textureViews[i].target = decl->SamplerView.Resource;
|
||||||
break;
|
break;
|
||||||
case TGSI_FILE_NULL:
|
|
||||||
case TGSI_FILE_TEMPORARY:
|
case TGSI_FILE_TEMPORARY:
|
||||||
|
for (i = first; i <= last; ++i)
|
||||||
|
tempArrayId[i] = arrayId;
|
||||||
|
break;
|
||||||
|
case TGSI_FILE_NULL:
|
||||||
case TGSI_FILE_ADDRESS:
|
case TGSI_FILE_ADDRESS:
|
||||||
case TGSI_FILE_CONSTANT:
|
case TGSI_FILE_CONSTANT:
|
||||||
case TGSI_FILE_IMMEDIATE:
|
case TGSI_FILE_IMMEDIATE:
|
||||||
|
@ -1223,7 +1242,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||||
} else
|
} else
|
||||||
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
|
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
|
||||||
if (insn.getDst(0).isIndirect(0))
|
if (insn.getDst(0).isIndirect(0))
|
||||||
mainTempsInLMem = true;
|
indirectTempArrays.insert(insn.getDst(0).getArrayId());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1231,7 +1250,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||||
Instruction::SrcRegister src = insn.getSrc(s);
|
Instruction::SrcRegister src = insn.getSrc(s);
|
||||||
if (src.getFile() == TGSI_FILE_TEMPORARY) {
|
if (src.getFile() == TGSI_FILE_TEMPORARY) {
|
||||||
if (src.isIndirect(0))
|
if (src.isIndirect(0))
|
||||||
mainTempsInLMem = true;
|
indirectTempArrays.insert(src.getArrayId());
|
||||||
} else
|
} else
|
||||||
/*
|
/*
|
||||||
if (src.getFile() == TGSI_FILE_RESOURCE) {
|
if (src.getFile() == TGSI_FILE_RESOURCE) {
|
||||||
|
@ -1416,6 +1435,7 @@ private:
|
||||||
DataType srcTy;
|
DataType srcTy;
|
||||||
|
|
||||||
DataArray tData; // TGSI_FILE_TEMPORARY
|
DataArray tData; // TGSI_FILE_TEMPORARY
|
||||||
|
DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
|
||||||
DataArray aData; // TGSI_FILE_ADDRESS
|
DataArray aData; // TGSI_FILE_ADDRESS
|
||||||
DataArray pData; // TGSI_FILE_PREDICATE
|
DataArray pData; // TGSI_FILE_PREDICATE
|
||||||
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
|
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
|
||||||
|
@ -1619,7 +1639,7 @@ Converter::getArrayForFile(unsigned file, int idx)
|
||||||
{
|
{
|
||||||
switch (file) {
|
switch (file) {
|
||||||
case TGSI_FILE_TEMPORARY:
|
case TGSI_FILE_TEMPORARY:
|
||||||
return &tData;
|
return idx == 0 ? &tData : &lData;
|
||||||
case TGSI_FILE_PREDICATE:
|
case TGSI_FILE_PREDICATE:
|
||||||
return &pData;
|
return &pData;
|
||||||
case TGSI_FILE_ADDRESS:
|
case TGSI_FILE_ADDRESS:
|
||||||
|
@ -1644,7 +1664,7 @@ Converter::shiftAddress(Value *index)
|
||||||
Value *
|
Value *
|
||||||
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
|
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
|
||||||
{
|
{
|
||||||
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
|
int idx2d = src.is2D() ? src.getIndex(1) : 0;
|
||||||
const int idx = src.getIndex(0);
|
const int idx = src.getIndex(0);
|
||||||
const int swz = src.getSwizzle(c);
|
const int swz = src.getSwizzle(c);
|
||||||
Instruction *ld;
|
Instruction *ld;
|
||||||
|
@ -1686,6 +1706,14 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
|
||||||
ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
|
ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
|
||||||
ld->perPatch = info->sv[idx].patch;
|
ld->perPatch = info->sv[idx].patch;
|
||||||
return ld->getDef(0);
|
return ld->getDef(0);
|
||||||
|
case TGSI_FILE_TEMPORARY: {
|
||||||
|
int arrayid = src.getArrayId();
|
||||||
|
if (!arrayid)
|
||||||
|
arrayid = code->tempArrayId[idx];
|
||||||
|
idx2d = (code->indirectTempArrays.find(arrayid) !=
|
||||||
|
code->indirectTempArrays.end());
|
||||||
|
}
|
||||||
|
/* fallthrough */
|
||||||
default:
|
default:
|
||||||
return getArrayForFile(src.getFile(), idx2d)->load(
|
return getArrayForFile(src.getFile(), idx2d)->load(
|
||||||
sub.cur->values, idx, swz, shiftAddress(ptr));
|
sub.cur->values, idx, swz, shiftAddress(ptr));
|
||||||
|
@ -1698,7 +1726,7 @@ Converter::acquireDst(int d, int c)
|
||||||
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
|
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
|
||||||
const unsigned f = dst.getFile();
|
const unsigned f = dst.getFile();
|
||||||
const int idx = dst.getIndex(0);
|
const int idx = dst.getIndex(0);
|
||||||
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
||||||
|
|
||||||
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
|
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1708,6 +1736,10 @@ Converter::acquireDst(int d, int c)
|
||||||
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
|
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
|
||||||
return getScratch();
|
return getScratch();
|
||||||
|
|
||||||
|
if (f == TGSI_FILE_TEMPORARY)
|
||||||
|
idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
|
||||||
|
code->indirectTempArrays.end();
|
||||||
|
|
||||||
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
|
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1740,7 +1772,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
|
||||||
{
|
{
|
||||||
const unsigned f = dst.getFile();
|
const unsigned f = dst.getFile();
|
||||||
const int idx = dst.getIndex(0);
|
const int idx = dst.getIndex(0);
|
||||||
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
||||||
|
|
||||||
if (f == TGSI_FILE_SYSTEM_VALUE) {
|
if (f == TGSI_FILE_SYSTEM_VALUE) {
|
||||||
assert(!ptr);
|
assert(!ptr);
|
||||||
|
@ -1763,6 +1795,10 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
|
||||||
f == TGSI_FILE_PREDICATE ||
|
f == TGSI_FILE_PREDICATE ||
|
||||||
f == TGSI_FILE_ADDRESS ||
|
f == TGSI_FILE_ADDRESS ||
|
||||||
f == TGSI_FILE_OUTPUT) {
|
f == TGSI_FILE_OUTPUT) {
|
||||||
|
if (f == TGSI_FILE_TEMPORARY)
|
||||||
|
idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
|
||||||
|
code->indirectTempArrays.end();
|
||||||
|
|
||||||
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
|
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
|
||||||
} else {
|
} else {
|
||||||
assert(!"invalid dst file");
|
assert(!"invalid dst file");
|
||||||
|
@ -3326,18 +3362,17 @@ Converter::exportOutputs()
|
||||||
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
|
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
|
||||||
code(code),
|
code(code),
|
||||||
tgsi(NULL),
|
tgsi(NULL),
|
||||||
tData(this), aData(this), pData(this), oData(this)
|
tData(this), lData(this), aData(this), pData(this), oData(this)
|
||||||
{
|
{
|
||||||
info = code->info;
|
info = code->info;
|
||||||
|
|
||||||
const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
|
|
||||||
|
|
||||||
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
|
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
|
||||||
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
|
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
|
||||||
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
|
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
|
||||||
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
|
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
|
||||||
|
|
||||||
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
|
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
|
||||||
|
lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
|
||||||
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
|
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
|
||||||
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
|
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
|
||||||
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
|
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
|
||||||
|
|
Loading…
Reference in New Issue