nv50/ir: rewrite the register allocator as GCRA, with spilling

This is more flexible than the linear scan, and we don't need the
separate allocation pass for constrained values anymore.
This commit is contained in:
Christoph Bumiller 2012-04-09 20:58:39 +02:00
parent 99319328d4
commit e43a3a66a9
11 changed files with 1491 additions and 434 deletions

View File

@ -222,66 +222,17 @@ Value::Value()
reg.size = 4;
}
bool
Value::coalesce(Value *jval, bool force)
{
Value *repr = this->join; // new representative
Value *jrep = jval->join;
if (reg.file != jval->reg.file || reg.size != jval->reg.size) {
if (!force)
return false;
ERROR("forced coalescing of values of different sizes/files");
}
if (!force && (repr->reg.data.id != jrep->reg.data.id)) {
if (repr->reg.data.id >= 0 &&
jrep->reg.data.id >= 0)
return false;
if (jrep->reg.data.id >= 0) {
repr = jval->join;
jrep = this->join;
jval = this;
}
// need to check all fixed register values of the program for overlap
Function *func = defs.front()->getInsn()->bb->getFunction();
// TODO: put values in by register-id bins per function
ArrayList::Iterator iter = func->allLValues.iterator();
for (; !iter.end(); iter.next()) {
Value *fixed = reinterpret_cast<Value *>(iter.get());
assert(fixed);
if (fixed->reg.data.id == repr->reg.data.id)
if (fixed->livei.overlaps(jrep->livei))
return false;
}
}
if (repr->livei.overlaps(jrep->livei)) {
if (!force)
return false;
// do we really want this ? if at all, only for constraint ops
INFO("NOTE: forced coalescing with live range overlap\n");
}
for (DefIterator it = jrep->defs.begin(); it != jrep->defs.end(); ++it)
(*it)->get()->join = repr;
repr->defs.insert(repr->defs.end(),
jrep->defs.begin(), jrep->defs.end());
repr->livei.unify(jrep->livei);
assert(repr->join == repr && jval->join == repr);
return true;
}
LValue::LValue(Function *fn, DataFile file)
{
reg.file = file;
reg.size = (file != FILE_PREDICATE) ? 4 : 1;
reg.data.id = -1;
affinity = -1;
compMask = 0;
compound = 0;
ssa = 0;
fixedReg = 0;
noSpill = 0;
fn->add(this, this->id);
}
@ -294,7 +245,11 @@ LValue::LValue(Function *fn, LValue *lval)
reg.size = lval->reg.size;
reg.data.id = -1;
affinity = -1;
compMask = 0;
compound = 0;
ssa = 0;
fixedReg = 0;
noSpill = 0;
fn->add(this, this->id);
}
@ -523,8 +478,8 @@ Value::interfers(const Value *that) const
idA = this->join->reg.data.offset;
idB = that->join->reg.data.offset;
} else {
idA = this->join->reg.data.id * this->reg.size;
idB = that->join->reg.data.id * that->reg.size;
idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
}
if (idA < idB)
@ -539,8 +494,6 @@ Value::interfers(const Value *that) const
bool
Value::equals(const Value *that, bool strict) const
{
that = that->join;
if (strict)
return this == that;
@ -754,20 +707,38 @@ Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
}
unsigned int
Instruction::defCount(unsigned int mask) const
Instruction::defCount(unsigned int mask, bool singleFile) const
{
unsigned int i, n;
if (singleFile) {
unsigned int d = ffs(mask);
if (!d)
return 0;
for (i = d--; defExists(i); ++i)
if (getDef(i)->reg.file != getDef(d)->reg.file)
mask &= ~(1 << i);
}
for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
n += mask & 1;
return n;
}
unsigned int
Instruction::srcCount(unsigned int mask) const
Instruction::srcCount(unsigned int mask, bool singleFile) const
{
unsigned int i, n;
if (singleFile) {
unsigned int s = ffs(mask);
if (!s)
return 0;
for (i = s--; srcExists(i); ++i)
if (getSrc(i)->reg.file != getSrc(s)->reg.file)
mask &= ~(1 << i);
}
for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
n += mask & 1;
return n;
@ -1137,6 +1108,7 @@ out:
info->bin.maxGPR = prog->maxGPR;
info->bin.code = prog->code;
info->bin.codeSize = prog->binSize;
info->bin.tlsSpace = prog->tlsSize;
delete prog;
nv50_ir::Target::destroy(targ);

View File

@ -219,6 +219,7 @@ enum DataFile
FILE_PREDICATE, // boolean predicate
FILE_FLAGS, // zero/sign/carry/overflow bits
FILE_ADDRESS,
LAST_REGISTER_FILE = FILE_ADDRESS,
FILE_IMMEDIATE,
FILE_MEMORY_CONST,
FILE_SHADER_INPUT,
@ -320,7 +321,7 @@ struct Storage
float f32;
double f64;
int32_t offset; // offset from 0 (base of address space)
int32_t id; // register id (< 0 if virtual/unassigned)
int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
struct {
SVSemantic sv;
int index;
@ -473,8 +474,6 @@ public:
inline const Symbol *asSym() const;
inline const ImmediateValue *asImm() const;
bool coalesce(Value *, bool force = false);
inline bool inFile(DataFile f) { return reg.file == f; }
static inline Value *get(Iterator&);
@ -506,9 +505,11 @@ public:
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
public:
unsigned ssa : 1;
int affinity;
unsigned compMask : 8; // compound/component mask
unsigned compound : 1; // used by RA, value involved in split/merge
unsigned ssa : 1;
unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
};
class Symbol : public Value
@ -611,7 +612,7 @@ public:
return s < srcs.size() && srcs[s].exists();
}
inline bool constrainedDefs() const { return defExists(1); }
inline bool constrainedDefs() const;
bool setPredicate(CondCode ccode, Value *);
inline Value *getPredicate() const;
@ -622,9 +623,9 @@ public:
inline void setFlagsDef(int d, Value *);
unsigned int defCount() const { return defs.size(); };
unsigned int defCount(unsigned int mask) const;
unsigned int defCount(unsigned int mask, bool singleFile = false) const;
unsigned int srcCount() const { return srcs.size(); };
unsigned int srcCount(unsigned int mask) const;
unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
// save & remove / set indirect[0,1] and predicate source
void takeExtraSources(int s, Value *[3]);
@ -965,6 +966,11 @@ public:
uint32_t binPos;
uint32_t binSize;
Value *stackPtr;
uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
uint32_t tlsSize;
ArrayList allBBlocks;
ArrayList allInsns;
ArrayList allLValues;
@ -1036,6 +1042,7 @@ public:
uint32_t *code;
uint32_t binSize;
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
int maxGPR;

View File

@ -41,6 +41,10 @@ Function::Function(Program *p, const char *fnName, uint32_t label)
binPos = 0;
binSize = 0;
stackPtr = NULL;
tlsBase = 0;
tlsSize = 0;
prog->add(this, id);
}

View File

@ -2420,6 +2420,7 @@ Program::makeFromTGSI(struct nv50_ir_prog_info *info)
tgsi::Source src(info);
if (!src.scanSource())
return false;
tlsSize = info->bin.tlsSpace;
Converter builder(this, &src);
return builder.run();

View File

@ -192,6 +192,11 @@ Instruction *Value::getUniqueInsn() const
return defs.front()->getInsn();
}
inline bool Instruction::constrainedDefs() const
{
return defExists(1) || op == OP_UNION;
}
Value *Instruction::getIndirect(int s, int dim) const
{
return srcs[s].isIndirect(dim) ? getSrc(srcs[s].indirect[dim]) : NULL;

View File

@ -48,7 +48,7 @@ Instruction::isNop() const
}
if (op == OP_MOV || op == OP_UNION) {
if (!def(0).rep()->equals(getSrc(0)))
if (!getDef(0)->equals(getSrc(0)))
return false;
if (op == OP_UNION)
if (!def(0).rep()->equals(getSrc(1)))

View File

@ -283,6 +283,9 @@ int LValue::print(char *buf, size_t size, DataType ty) const
else
if (reg.size == 16)
postFix = "q";
else
if (reg.size == 12)
postFix = "t";
break;
case FILE_PREDICATE:
r = 'p'; col = TXT_REGISTER;

File diff suppressed because it is too large Load Diff

View File

@ -88,6 +88,11 @@ Stack::moveTo(Stack& that)
this->size = 0;
}
Interval::Interval(const Interval& that) : head(NULL), tail(NULL)
{
this->insert(that);
}
Interval::~Interval()
{
clear();
@ -148,7 +153,7 @@ Interval::extend(int a, int b)
return true;
}
bool Interval::contains(int pos)
bool Interval::contains(int pos) const
{
for (Range *r = head; r && r->bgn <= pos; r = r->next)
if (r->end > pos)
@ -156,16 +161,37 @@ bool Interval::contains(int pos)
return false;
}
bool Interval::overlaps(const Interval &iv) const
bool Interval::overlaps(const Interval &that) const
{
#if 1
Range *a = this->head;
Range *b = that.head;
while (a && b) {
if (b->bgn < a->end &&
b->end > a->bgn)
return true;
if (a->end <= b->bgn)
a = a->next;
else
b = b->next;
}
#else
for (Range *rA = this->head; rA; rA = rA->next)
for (Range *rB = iv.head; rB; rB = rB->next)
if (rB->bgn < rA->end &&
rB->end > rA->bgn)
return true;
#endif
return false;
}
void Interval::insert(const Interval &that)
{
for (Range *r = that.head; r; r = r->next)
this->extend(r->bgn, r->end);
}
void Interval::unify(Interval &that)
{
assert(this != &that);
@ -177,6 +203,14 @@ void Interval::unify(Interval &that)
that.head = NULL;
}
int Interval::length() const
{
int len = 0;
for (Range *r = head; r; r = r->next)
len += r->bgn - r->end;
return len;
}
void Interval::print() const
{
if (!head)
@ -205,6 +239,27 @@ BitSet& BitSet::operator|=(const BitSet &set)
return *this;
}
bool BitSet::resize(unsigned int nBits)
{
if (!data || !nBits)
return allocate(nBits, true);
const unsigned int p = (size + 31) / 32;
const unsigned int n = (nBits + 31) / 32;
if (n == p)
return true;
data = (uint32_t *)REALLOC(data, 4 * p, 4 * n);
if (!data) {
size = 0;
return false;
}
if (n > p)
memset(&data[4 * p + 4], 0, (n - p) * 4);
size = nBits;
return true;
}
bool BitSet::allocate(unsigned int nBits, bool zero)
{
if (data && size < nBits) {
@ -254,6 +309,65 @@ void BitSet::setOr(BitSet *pA, BitSet *pB)
}
}
int BitSet::findFreeRange(unsigned int count) const
{
const uint32_t m = (1 << count) - 1;
int pos = size;
unsigned int i;
const unsigned int end = (size + 31) / 32;
if (count == 1) {
for (i = 0; i < end; ++i) {
pos = ffs(~data[i]) - 1;
if (pos >= 0)
break;
}
} else
if (count == 2) {
for (i = 0; i < end; ++i) {
if (data[i] != 0xffffffff) {
uint32_t b = data[i] | (data[i] >> 1) | 0xaaaaaaaa;
pos = ffs(~b) - 1;
if (pos >= 0)
break;
}
}
} else
if (count == 4 || count == 3) {
for (i = 0; i < end; ++i) {
if (data[i] != 0xffffffff) {
uint32_t b =
(data[i] >> 0) | (data[i] >> 1) |
(data[i] >> 2) | (data[i] >> 3) | 0xeeeeeeee;
pos = ffs(~b) - 1;
if (pos >= 0)
break;
}
}
} else {
if (count <= 8)
count = 8;
else
if (count <= 16)
count = 16;
else
count = 32;
for (i = 0; i < end; ++i) {
if (data[i] != 0xffffffff) {
for (pos = 0; pos < 32; pos += count)
if (!(data[i] & (m << pos)))
break;
if (pos < 32)
break;
}
}
}
pos += i * 32;
return ((pos + count) <= size) ? pos : -1;
}
void BitSet::print() const
{
unsigned int n = 0;

View File

@ -137,6 +137,8 @@ public:
(__listA)->prev = prevB; \
} while(0)
#define DLLIST_EMPTY(__list) ((__list)->next == (__list))
#define DLLIST_FOR_EACH(list, it) \
for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next())
@ -416,17 +418,22 @@ class Interval
{
public:
Interval() : head(0), tail(0) { }
Interval(const Interval&);
~Interval();
bool extend(int, int);
void insert(const Interval&);
void unify(Interval&); // clears source interval
void clear();
inline int begin() { return head ? head->bgn : -1; }
inline int end() { checkTail(); return tail ? tail->end : -1; }
inline int begin() const { return head ? head->bgn : -1; }
inline int end() const { checkTail(); return tail ? tail->end : -1; }
inline bool isEmpty() const { return !head; }
bool overlaps(const Interval&) const;
bool contains(int pos);
bool contains(int pos) const;
inline int extent() const { return end() - begin(); }
int length() const;
void print() const;
@ -477,6 +484,7 @@ public:
}
bool allocate(unsigned int nBits, bool zero);
bool resize(unsigned int nBits); // keep old data, zero additional bits
inline unsigned int getSize() const { return size; }
@ -489,18 +497,44 @@ public:
assert(i < size);
data[i / 32] |= 1 << (i % 32);
}
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
inline void setRange(unsigned int i, unsigned int n)
{
assert((i + n) <= size && (((i % 32) + n) <= 32));
data[i / 32] |= ((1 << n) - 1) << (i % 32);
}
inline void setMask(unsigned int i, uint32_t m)
{
assert(i < size);
data[i / 32] |= m;
}
inline void clr(unsigned int i)
{
assert(i < size);
data[i / 32] &= ~(1 << (i % 32));
}
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
inline void clrRange(unsigned int i, unsigned int n)
{
assert((i + n) <= size && (((i % 32) + n) <= 32));
data[i / 32] &= ~(((1 << n) - 1) << (i % 32));
}
inline bool test(unsigned int i) const
{
assert(i < size);
return data[i / 32] & (1 << (i % 32));
}
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
inline bool testRange(unsigned int i, unsigned int n)
{
assert((i + n) <= size && (((i % 32) + n) <= 32));
return data[i / 32] & (((1 << n) - 1) << (i % 32));
}
// Find a range of size (<= 32) clear bits aligned to roundup_pow2(size).
int findFreeRange(unsigned int size) const;
BitSet& operator|=(const BitSet&);
@ -514,6 +548,13 @@ public:
void andNot(const BitSet&);
// bits = (bits | setMask) & ~clrMask
inline void periodicMask32(uint32_t setMask, uint32_t clrMask)
{
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
data[i] = (data[i] | setMask) & ~clrMask;
}
unsigned int popCount() const;
void print() const;

View File

@ -1009,9 +1009,7 @@ CodeEmitterNVC0::emitTEX(const TexInstruction *i)
if (i->tex.target.isShadow())
code[1] |= 1 << 24;
int src1 = i->tex.target.getArgCount();
if (i->op == OP_TXD && i->tex.useOffsets)
++src1;
const int src1 = MAX2(i->predSrc + 1, 1); // if predSrc == 1, no 2nd src
if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
// lzero
@ -1184,7 +1182,7 @@ CodeEmitterNVC0::emitVFETCH(const Instruction *i)
emitPredicate(i);
code[0] |= (i->defCount(0xf) - 1) << 5;
code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
defId(i->def(0), 14);
srcId(i->src(0).getIndirect(0), 20);