nv50/ir: rewrite the register allocator as GCRA, with spilling
This is more flexible than the linear scan, and we don't need the separate allocation pass for constrained values anymore.
This commit is contained in:
parent
99319328d4
commit
e43a3a66a9
|
@ -222,66 +222,17 @@ Value::Value()
|
|||
reg.size = 4;
|
||||
}
|
||||
|
||||
bool
|
||||
Value::coalesce(Value *jval, bool force)
|
||||
{
|
||||
Value *repr = this->join; // new representative
|
||||
Value *jrep = jval->join;
|
||||
|
||||
if (reg.file != jval->reg.file || reg.size != jval->reg.size) {
|
||||
if (!force)
|
||||
return false;
|
||||
ERROR("forced coalescing of values of different sizes/files");
|
||||
}
|
||||
|
||||
if (!force && (repr->reg.data.id != jrep->reg.data.id)) {
|
||||
if (repr->reg.data.id >= 0 &&
|
||||
jrep->reg.data.id >= 0)
|
||||
return false;
|
||||
if (jrep->reg.data.id >= 0) {
|
||||
repr = jval->join;
|
||||
jrep = this->join;
|
||||
jval = this;
|
||||
}
|
||||
|
||||
// need to check all fixed register values of the program for overlap
|
||||
Function *func = defs.front()->getInsn()->bb->getFunction();
|
||||
|
||||
// TODO: put values in by register-id bins per function
|
||||
ArrayList::Iterator iter = func->allLValues.iterator();
|
||||
for (; !iter.end(); iter.next()) {
|
||||
Value *fixed = reinterpret_cast<Value *>(iter.get());
|
||||
assert(fixed);
|
||||
if (fixed->reg.data.id == repr->reg.data.id)
|
||||
if (fixed->livei.overlaps(jrep->livei))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (repr->livei.overlaps(jrep->livei)) {
|
||||
if (!force)
|
||||
return false;
|
||||
// do we really want this ? if at all, only for constraint ops
|
||||
INFO("NOTE: forced coalescing with live range overlap\n");
|
||||
}
|
||||
|
||||
for (DefIterator it = jrep->defs.begin(); it != jrep->defs.end(); ++it)
|
||||
(*it)->get()->join = repr;
|
||||
|
||||
repr->defs.insert(repr->defs.end(),
|
||||
jrep->defs.begin(), jrep->defs.end());
|
||||
repr->livei.unify(jrep->livei);
|
||||
|
||||
assert(repr->join == repr && jval->join == repr);
|
||||
return true;
|
||||
}
|
||||
|
||||
LValue::LValue(Function *fn, DataFile file)
|
||||
{
|
||||
reg.file = file;
|
||||
reg.size = (file != FILE_PREDICATE) ? 4 : 1;
|
||||
reg.data.id = -1;
|
||||
|
||||
affinity = -1;
|
||||
compMask = 0;
|
||||
compound = 0;
|
||||
ssa = 0;
|
||||
fixedReg = 0;
|
||||
noSpill = 0;
|
||||
|
||||
fn->add(this, this->id);
|
||||
}
|
||||
|
@ -294,7 +245,11 @@ LValue::LValue(Function *fn, LValue *lval)
|
|||
reg.size = lval->reg.size;
|
||||
reg.data.id = -1;
|
||||
|
||||
affinity = -1;
|
||||
compMask = 0;
|
||||
compound = 0;
|
||||
ssa = 0;
|
||||
fixedReg = 0;
|
||||
noSpill = 0;
|
||||
|
||||
fn->add(this, this->id);
|
||||
}
|
||||
|
@ -523,8 +478,8 @@ Value::interfers(const Value *that) const
|
|||
idA = this->join->reg.data.offset;
|
||||
idB = that->join->reg.data.offset;
|
||||
} else {
|
||||
idA = this->join->reg.data.id * this->reg.size;
|
||||
idB = that->join->reg.data.id * that->reg.size;
|
||||
idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
|
||||
idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
|
||||
}
|
||||
|
||||
if (idA < idB)
|
||||
|
@ -539,8 +494,6 @@ Value::interfers(const Value *that) const
|
|||
bool
|
||||
Value::equals(const Value *that, bool strict) const
|
||||
{
|
||||
that = that->join;
|
||||
|
||||
if (strict)
|
||||
return this == that;
|
||||
|
||||
|
@ -754,20 +707,38 @@ Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
|
|||
}
|
||||
|
||||
unsigned int
|
||||
Instruction::defCount(unsigned int mask) const
|
||||
Instruction::defCount(unsigned int mask, bool singleFile) const
|
||||
{
|
||||
unsigned int i, n;
|
||||
|
||||
if (singleFile) {
|
||||
unsigned int d = ffs(mask);
|
||||
if (!d)
|
||||
return 0;
|
||||
for (i = d--; defExists(i); ++i)
|
||||
if (getDef(i)->reg.file != getDef(d)->reg.file)
|
||||
mask &= ~(1 << i);
|
||||
}
|
||||
|
||||
for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
|
||||
n += mask & 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
Instruction::srcCount(unsigned int mask) const
|
||||
Instruction::srcCount(unsigned int mask, bool singleFile) const
|
||||
{
|
||||
unsigned int i, n;
|
||||
|
||||
if (singleFile) {
|
||||
unsigned int s = ffs(mask);
|
||||
if (!s)
|
||||
return 0;
|
||||
for (i = s--; srcExists(i); ++i)
|
||||
if (getSrc(i)->reg.file != getSrc(s)->reg.file)
|
||||
mask &= ~(1 << i);
|
||||
}
|
||||
|
||||
for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
|
||||
n += mask & 1;
|
||||
return n;
|
||||
|
@ -1137,6 +1108,7 @@ out:
|
|||
info->bin.maxGPR = prog->maxGPR;
|
||||
info->bin.code = prog->code;
|
||||
info->bin.codeSize = prog->binSize;
|
||||
info->bin.tlsSpace = prog->tlsSize;
|
||||
|
||||
delete prog;
|
||||
nv50_ir::Target::destroy(targ);
|
||||
|
|
|
@ -219,6 +219,7 @@ enum DataFile
|
|||
FILE_PREDICATE, // boolean predicate
|
||||
FILE_FLAGS, // zero/sign/carry/overflow bits
|
||||
FILE_ADDRESS,
|
||||
LAST_REGISTER_FILE = FILE_ADDRESS,
|
||||
FILE_IMMEDIATE,
|
||||
FILE_MEMORY_CONST,
|
||||
FILE_SHADER_INPUT,
|
||||
|
@ -320,7 +321,7 @@ struct Storage
|
|||
float f32;
|
||||
double f64;
|
||||
int32_t offset; // offset from 0 (base of address space)
|
||||
int32_t id; // register id (< 0 if virtual/unassigned)
|
||||
int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
|
||||
struct {
|
||||
SVSemantic sv;
|
||||
int index;
|
||||
|
@ -473,8 +474,6 @@ public:
|
|||
inline const Symbol *asSym() const;
|
||||
inline const ImmediateValue *asImm() const;
|
||||
|
||||
bool coalesce(Value *, bool force = false);
|
||||
|
||||
inline bool inFile(DataFile f) { return reg.file == f; }
|
||||
|
||||
static inline Value *get(Iterator&);
|
||||
|
@ -506,9 +505,11 @@ public:
|
|||
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
|
||||
|
||||
public:
|
||||
unsigned ssa : 1;
|
||||
|
||||
int affinity;
|
||||
unsigned compMask : 8; // compound/component mask
|
||||
unsigned compound : 1; // used by RA, value involved in split/merge
|
||||
unsigned ssa : 1;
|
||||
unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
|
||||
unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
|
||||
};
|
||||
|
||||
class Symbol : public Value
|
||||
|
@ -611,7 +612,7 @@ public:
|
|||
return s < srcs.size() && srcs[s].exists();
|
||||
}
|
||||
|
||||
inline bool constrainedDefs() const { return defExists(1); }
|
||||
inline bool constrainedDefs() const;
|
||||
|
||||
bool setPredicate(CondCode ccode, Value *);
|
||||
inline Value *getPredicate() const;
|
||||
|
@ -622,9 +623,9 @@ public:
|
|||
inline void setFlagsDef(int d, Value *);
|
||||
|
||||
unsigned int defCount() const { return defs.size(); };
|
||||
unsigned int defCount(unsigned int mask) const;
|
||||
unsigned int defCount(unsigned int mask, bool singleFile = false) const;
|
||||
unsigned int srcCount() const { return srcs.size(); };
|
||||
unsigned int srcCount(unsigned int mask) const;
|
||||
unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
|
||||
|
||||
// save & remove / set indirect[0,1] and predicate source
|
||||
void takeExtraSources(int s, Value *[3]);
|
||||
|
@ -965,6 +966,11 @@ public:
|
|||
uint32_t binPos;
|
||||
uint32_t binSize;
|
||||
|
||||
Value *stackPtr;
|
||||
|
||||
uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
|
||||
uint32_t tlsSize;
|
||||
|
||||
ArrayList allBBlocks;
|
||||
ArrayList allInsns;
|
||||
ArrayList allLValues;
|
||||
|
@ -1036,6 +1042,7 @@ public:
|
|||
|
||||
uint32_t *code;
|
||||
uint32_t binSize;
|
||||
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
|
||||
|
||||
int maxGPR;
|
||||
|
||||
|
|
|
@ -41,6 +41,10 @@ Function::Function(Program *p, const char *fnName, uint32_t label)
|
|||
binPos = 0;
|
||||
binSize = 0;
|
||||
|
||||
stackPtr = NULL;
|
||||
tlsBase = 0;
|
||||
tlsSize = 0;
|
||||
|
||||
prog->add(this, id);
|
||||
}
|
||||
|
||||
|
|
|
@ -2420,6 +2420,7 @@ Program::makeFromTGSI(struct nv50_ir_prog_info *info)
|
|||
tgsi::Source src(info);
|
||||
if (!src.scanSource())
|
||||
return false;
|
||||
tlsSize = info->bin.tlsSpace;
|
||||
|
||||
Converter builder(this, &src);
|
||||
return builder.run();
|
||||
|
|
|
@ -192,6 +192,11 @@ Instruction *Value::getUniqueInsn() const
|
|||
return defs.front()->getInsn();
|
||||
}
|
||||
|
||||
inline bool Instruction::constrainedDefs() const
|
||||
{
|
||||
return defExists(1) || op == OP_UNION;
|
||||
}
|
||||
|
||||
Value *Instruction::getIndirect(int s, int dim) const
|
||||
{
|
||||
return srcs[s].isIndirect(dim) ? getSrc(srcs[s].indirect[dim]) : NULL;
|
||||
|
|
|
@ -48,7 +48,7 @@ Instruction::isNop() const
|
|||
}
|
||||
|
||||
if (op == OP_MOV || op == OP_UNION) {
|
||||
if (!def(0).rep()->equals(getSrc(0)))
|
||||
if (!getDef(0)->equals(getSrc(0)))
|
||||
return false;
|
||||
if (op == OP_UNION)
|
||||
if (!def(0).rep()->equals(getSrc(1)))
|
||||
|
|
|
@ -283,6 +283,9 @@ int LValue::print(char *buf, size_t size, DataType ty) const
|
|||
else
|
||||
if (reg.size == 16)
|
||||
postFix = "q";
|
||||
else
|
||||
if (reg.size == 12)
|
||||
postFix = "t";
|
||||
break;
|
||||
case FILE_PREDICATE:
|
||||
r = 'p'; col = TXT_REGISTER;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -88,6 +88,11 @@ Stack::moveTo(Stack& that)
|
|||
this->size = 0;
|
||||
}
|
||||
|
||||
Interval::Interval(const Interval& that) : head(NULL), tail(NULL)
|
||||
{
|
||||
this->insert(that);
|
||||
}
|
||||
|
||||
Interval::~Interval()
|
||||
{
|
||||
clear();
|
||||
|
@ -148,7 +153,7 @@ Interval::extend(int a, int b)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Interval::contains(int pos)
|
||||
bool Interval::contains(int pos) const
|
||||
{
|
||||
for (Range *r = head; r && r->bgn <= pos; r = r->next)
|
||||
if (r->end > pos)
|
||||
|
@ -156,16 +161,37 @@ bool Interval::contains(int pos)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool Interval::overlaps(const Interval &iv) const
|
||||
bool Interval::overlaps(const Interval &that) const
|
||||
{
|
||||
#if 1
|
||||
Range *a = this->head;
|
||||
Range *b = that.head;
|
||||
|
||||
while (a && b) {
|
||||
if (b->bgn < a->end &&
|
||||
b->end > a->bgn)
|
||||
return true;
|
||||
if (a->end <= b->bgn)
|
||||
a = a->next;
|
||||
else
|
||||
b = b->next;
|
||||
}
|
||||
#else
|
||||
for (Range *rA = this->head; rA; rA = rA->next)
|
||||
for (Range *rB = iv.head; rB; rB = rB->next)
|
||||
if (rB->bgn < rA->end &&
|
||||
rB->end > rA->bgn)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
void Interval::insert(const Interval &that)
|
||||
{
|
||||
for (Range *r = that.head; r; r = r->next)
|
||||
this->extend(r->bgn, r->end);
|
||||
}
|
||||
|
||||
void Interval::unify(Interval &that)
|
||||
{
|
||||
assert(this != &that);
|
||||
|
@ -177,6 +203,14 @@ void Interval::unify(Interval &that)
|
|||
that.head = NULL;
|
||||
}
|
||||
|
||||
int Interval::length() const
|
||||
{
|
||||
int len = 0;
|
||||
for (Range *r = head; r; r = r->next)
|
||||
len += r->bgn - r->end;
|
||||
return len;
|
||||
}
|
||||
|
||||
void Interval::print() const
|
||||
{
|
||||
if (!head)
|
||||
|
@ -205,6 +239,27 @@ BitSet& BitSet::operator|=(const BitSet &set)
|
|||
return *this;
|
||||
}
|
||||
|
||||
bool BitSet::resize(unsigned int nBits)
|
||||
{
|
||||
if (!data || !nBits)
|
||||
return allocate(nBits, true);
|
||||
const unsigned int p = (size + 31) / 32;
|
||||
const unsigned int n = (nBits + 31) / 32;
|
||||
if (n == p)
|
||||
return true;
|
||||
|
||||
data = (uint32_t *)REALLOC(data, 4 * p, 4 * n);
|
||||
if (!data) {
|
||||
size = 0;
|
||||
return false;
|
||||
}
|
||||
if (n > p)
|
||||
memset(&data[4 * p + 4], 0, (n - p) * 4);
|
||||
|
||||
size = nBits;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BitSet::allocate(unsigned int nBits, bool zero)
|
||||
{
|
||||
if (data && size < nBits) {
|
||||
|
@ -254,6 +309,65 @@ void BitSet::setOr(BitSet *pA, BitSet *pB)
|
|||
}
|
||||
}
|
||||
|
||||
int BitSet::findFreeRange(unsigned int count) const
|
||||
{
|
||||
const uint32_t m = (1 << count) - 1;
|
||||
int pos = size;
|
||||
unsigned int i;
|
||||
const unsigned int end = (size + 31) / 32;
|
||||
|
||||
if (count == 1) {
|
||||
for (i = 0; i < end; ++i) {
|
||||
pos = ffs(~data[i]) - 1;
|
||||
if (pos >= 0)
|
||||
break;
|
||||
}
|
||||
} else
|
||||
if (count == 2) {
|
||||
for (i = 0; i < end; ++i) {
|
||||
if (data[i] != 0xffffffff) {
|
||||
uint32_t b = data[i] | (data[i] >> 1) | 0xaaaaaaaa;
|
||||
pos = ffs(~b) - 1;
|
||||
if (pos >= 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else
|
||||
if (count == 4 || count == 3) {
|
||||
for (i = 0; i < end; ++i) {
|
||||
if (data[i] != 0xffffffff) {
|
||||
uint32_t b =
|
||||
(data[i] >> 0) | (data[i] >> 1) |
|
||||
(data[i] >> 2) | (data[i] >> 3) | 0xeeeeeeee;
|
||||
pos = ffs(~b) - 1;
|
||||
if (pos >= 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (count <= 8)
|
||||
count = 8;
|
||||
else
|
||||
if (count <= 16)
|
||||
count = 16;
|
||||
else
|
||||
count = 32;
|
||||
|
||||
for (i = 0; i < end; ++i) {
|
||||
if (data[i] != 0xffffffff) {
|
||||
for (pos = 0; pos < 32; pos += count)
|
||||
if (!(data[i] & (m << pos)))
|
||||
break;
|
||||
if (pos < 32)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
pos += i * 32;
|
||||
|
||||
return ((pos + count) <= size) ? pos : -1;
|
||||
}
|
||||
|
||||
void BitSet::print() const
|
||||
{
|
||||
unsigned int n = 0;
|
||||
|
|
|
@ -137,6 +137,8 @@ public:
|
|||
(__listA)->prev = prevB; \
|
||||
} while(0)
|
||||
|
||||
#define DLLIST_EMPTY(__list) ((__list)->next == (__list))
|
||||
|
||||
#define DLLIST_FOR_EACH(list, it) \
|
||||
for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next())
|
||||
|
||||
|
@ -416,17 +418,22 @@ class Interval
|
|||
{
|
||||
public:
|
||||
Interval() : head(0), tail(0) { }
|
||||
Interval(const Interval&);
|
||||
~Interval();
|
||||
|
||||
bool extend(int, int);
|
||||
void insert(const Interval&);
|
||||
void unify(Interval&); // clears source interval
|
||||
void clear();
|
||||
|
||||
inline int begin() { return head ? head->bgn : -1; }
|
||||
inline int end() { checkTail(); return tail ? tail->end : -1; }
|
||||
inline int begin() const { return head ? head->bgn : -1; }
|
||||
inline int end() const { checkTail(); return tail ? tail->end : -1; }
|
||||
inline bool isEmpty() const { return !head; }
|
||||
bool overlaps(const Interval&) const;
|
||||
bool contains(int pos);
|
||||
bool contains(int pos) const;
|
||||
|
||||
inline int extent() const { return end() - begin(); }
|
||||
int length() const;
|
||||
|
||||
void print() const;
|
||||
|
||||
|
@ -477,6 +484,7 @@ public:
|
|||
}
|
||||
|
||||
bool allocate(unsigned int nBits, bool zero);
|
||||
bool resize(unsigned int nBits); // keep old data, zero additional bits
|
||||
|
||||
inline unsigned int getSize() const { return size; }
|
||||
|
||||
|
@ -489,18 +497,44 @@ public:
|
|||
assert(i < size);
|
||||
data[i / 32] |= 1 << (i % 32);
|
||||
}
|
||||
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
|
||||
inline void setRange(unsigned int i, unsigned int n)
|
||||
{
|
||||
assert((i + n) <= size && (((i % 32) + n) <= 32));
|
||||
data[i / 32] |= ((1 << n) - 1) << (i % 32);
|
||||
}
|
||||
inline void setMask(unsigned int i, uint32_t m)
|
||||
{
|
||||
assert(i < size);
|
||||
data[i / 32] |= m;
|
||||
}
|
||||
|
||||
inline void clr(unsigned int i)
|
||||
{
|
||||
assert(i < size);
|
||||
data[i / 32] &= ~(1 << (i % 32));
|
||||
}
|
||||
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
|
||||
inline void clrRange(unsigned int i, unsigned int n)
|
||||
{
|
||||
assert((i + n) <= size && (((i % 32) + n) <= 32));
|
||||
data[i / 32] &= ~(((1 << n) - 1) << (i % 32));
|
||||
}
|
||||
|
||||
inline bool test(unsigned int i) const
|
||||
{
|
||||
assert(i < size);
|
||||
return data[i / 32] & (1 << (i % 32));
|
||||
}
|
||||
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
|
||||
inline bool testRange(unsigned int i, unsigned int n)
|
||||
{
|
||||
assert((i + n) <= size && (((i % 32) + n) <= 32));
|
||||
return data[i / 32] & (((1 << n) - 1) << (i % 32));
|
||||
}
|
||||
|
||||
// Find a range of size (<= 32) clear bits aligned to roundup_pow2(size).
|
||||
int findFreeRange(unsigned int size) const;
|
||||
|
||||
BitSet& operator|=(const BitSet&);
|
||||
|
||||
|
@ -514,6 +548,13 @@ public:
|
|||
|
||||
void andNot(const BitSet&);
|
||||
|
||||
// bits = (bits | setMask) & ~clrMask
|
||||
inline void periodicMask32(uint32_t setMask, uint32_t clrMask)
|
||||
{
|
||||
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
|
||||
data[i] = (data[i] | setMask) & ~clrMask;
|
||||
}
|
||||
|
||||
unsigned int popCount() const;
|
||||
|
||||
void print() const;
|
||||
|
|
|
@ -1009,9 +1009,7 @@ CodeEmitterNVC0::emitTEX(const TexInstruction *i)
|
|||
if (i->tex.target.isShadow())
|
||||
code[1] |= 1 << 24;
|
||||
|
||||
int src1 = i->tex.target.getArgCount();
|
||||
if (i->op == OP_TXD && i->tex.useOffsets)
|
||||
++src1;
|
||||
const int src1 = MAX2(i->predSrc + 1, 1); // if predSrc == 1, no 2nd src
|
||||
|
||||
if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
|
||||
// lzero
|
||||
|
@ -1184,7 +1182,7 @@ CodeEmitterNVC0::emitVFETCH(const Instruction *i)
|
|||
|
||||
emitPredicate(i);
|
||||
|
||||
code[0] |= (i->defCount(0xf) - 1) << 5;
|
||||
code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
|
||||
|
||||
defId(i->def(0), 14);
|
||||
srcId(i->src(0).getIndirect(0), 20);
|
||||
|
|
Loading…
Reference in New Issue