nv50/ir: add various new OPs that will be needed for compute

This commit is contained in:
Christoph Bumiller 2013-02-22 18:45:16 +01:00
parent c82714c593
commit 22b762f9b4
9 changed files with 179 additions and 48 deletions

View File

@ -567,11 +567,11 @@ void Instruction::init()
terminator = 0;
ftz = 0;
dnz = 0;
atomic = 0;
perPatch = 0;
fixed = 0;
encSize = 0;
ipa = 0;
mask = 0;
lanes = 0xf;
@ -733,7 +733,7 @@ Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
i->saturate = saturate;
i->join = join;
i->exit = exit;
i->atomic = atomic;
i->mask = mask;
i->ftz = ftz;
i->dnz = dnz;
i->ipa = ipa;

View File

@ -66,7 +66,7 @@ enum operation
OP_SHR,
OP_MAX,
OP_MIN,
OP_SAT, // CLAMP(f32, 0.0, 1.0)
OP_SAT, // CLAMP(f32, 0.0, 1.0)
OP_CEIL,
OP_FLOOR,
OP_TRUNC,
@ -102,7 +102,7 @@ enum operation
OP_JOIN, // converge
OP_DISCARD,
OP_EXIT,
OP_MEMBAR,
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
OP_EXPORT,
@ -117,21 +117,42 @@ enum operation
OP_TXQ, // texture size query
OP_TXD, // texture derivatives
OP_TXG, // texture gather
OP_TEXCSAA,
OP_SULD, // surface load
OP_SUST, // surface store
OP_TEXCSAA, // texture op for coverage sampling
OP_TEXPREP, // turn cube map array into 2d array coordinates
OP_SULDB, // surface load (raw)
OP_SULDP, // surface load (formatted)
OP_SUSTB, // surface store (raw)
OP_SUSTP, // surface store (formatted)
OP_SUREDB,
OP_SUREDP, // surface reduction (atomic op)
OP_SULEA, // surface load effective address
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
OP_DFDY,
OP_RDSV, // read system value
OP_WRSV, // write system value
OP_TEXPREP, // turn cube map array into 2d array coordinates, TODO: move
OP_QUADOP,
OP_QUADON,
OP_QUADPOP,
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF,
OP_TEXBAR,
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
OP_VADD, // byte/word vector operations
OP_VAVG,
OP_VMIN,
OP_VMAX,
OP_VSAD,
OP_VSET,
OP_VSHR,
OP_VSHL,
OP_VSEL,
OP_LAST
};
@ -146,6 +167,59 @@ enum operation
#define NV50_IR_SUBOP_EMU_PRERET 1
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
#define NV50_IR_SUBOP_EXTBF_REV 1
#define NV50_IR_SUBOP_PERMT_F4E 1
#define NV50_IR_SUBOP_PERMT_B4E 2
#define NV50_IR_SUBOP_PERMT_RC8 3
#define NV50_IR_SUBOP_PERMT_ECL 4
#define NV50_IR_SUBOP_PERMT_ECR 5
#define NV50_IR_SUBOP_PERMT_RC16 6
#define NV50_IR_SUBOP_BAR_SYNC 0
#define NV50_IR_SUBOP_BAR_ARRIVE 1
#define NV50_IR_SUBOP_BAR_RED_AND 2
#define NV50_IR_SUBOP_BAR_RED_OR 3
#define NV50_IR_SUBOP_BAR_RED_POPC 4
#define NV50_IR_SUBOP_MEMBAR_L 1
#define NV50_IR_SUBOP_MEMBAR_S 2
#define NV50_IR_SUBOP_MEMBAR_M 3
#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
#define NV50_IR_SUBOP_MEMBAR(d,s) \
(NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
#define NV50_IR_SUBOP_ATOM_ADD 0
#define NV50_IR_SUBOP_ATOM_MIN 1
#define NV50_IR_SUBOP_ATOM_MAX 2
#define NV50_IR_SUBOP_ATOM_INC 3
#define NV50_IR_SUBOP_ATOM_DEC 4
#define NV50_IR_SUBOP_ATOM_AND 5
#define NV50_IR_SUBOP_ATOM_OR 6
#define NV50_IR_SUBOP_ATOM_XOR 7
#define NV50_IR_SUBOP_ATOM_CAS 8
#define NV50_IR_SUBOP_ATOM_EXCH 9
#define NV50_IR_SUBOP_SUST_IGN 0
#define NV50_IR_SUBOP_SUST_TRAP 1
#define NV50_IR_SUBOP_SUST_SDCL 3
#define NV50_IR_SUBOP_SULD_ZERO 0
#define NV50_IR_SUBOP_SULD_TRAP 1
#define NV50_IR_SUBOP_SULD_SDCL 3
#define NV50_IR_SUBOP_SUBFM_3D 1
#define NV50_IR_SUBOP_SUCLAMP_2D 0x10
#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
// This will have to do for now.
// The bitfields are supposed to correspond to nve4 ISA.
#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
enum DataType
{
@ -680,22 +754,20 @@ public:
RoundMode rnd;
CacheMode cache;
uint8_t subOp; // quadop, 1 for mul-high, etc.
uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
uint16_t subOp; // quadop, 1 for mul-high, etc.
unsigned encSize : 4; // encoding size in bytes
unsigned saturate : 1; // to [0.0f, 1.0f]
unsigned join : 1; // converge control flow (use OP_JOIN until end)
unsigned fixed : 1; // prevent dead code elimination
unsigned terminator : 1; // end of basic block
unsigned atomic : 1;
unsigned ftz : 1; // flush denormal to zero
unsigned dnz : 1; // denormals, NaN are zero
unsigned ipa : 4; // interpolation mode
unsigned lanes : 4;
unsigned perPatch : 1;
unsigned exit : 1; // terminate program after insn
unsigned mask : 4; // for vector ops
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
@ -703,6 +775,8 @@ public:
int8_t flagsDef;
int8_t flagsSrc;
uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
BasicBlock *bb;
protected:

View File

@ -43,7 +43,7 @@ static inline bool isMemoryFile(DataFile f)
// contrary to asTex(), this will never include SULD/SUST
static inline bool isTextureOp(operation op)
{
return (op >= OP_TEX && op <= OP_TEXCSAA) || (op == OP_TEXPREP);
return (op >= OP_TEX && op <= OP_TEXPREP);
}
static inline unsigned int typeSizeof(DataType ty)
@ -304,14 +304,14 @@ const FlowInstruction *Instruction::asFlow() const
TexInstruction *Instruction::asTex()
{
if ((op >= OP_TEX && op <= OP_TEXCSAA) || (op == OP_TEXPREP))
if (op >= OP_TEX && op <= OP_SULEA)
return static_cast<TexInstruction *>(this);
return NULL;
}
const TexInstruction *Instruction::asTex() const
{
if (op >= OP_TEX && op <= OP_TEXCSAA)
if (op >= OP_TEX && op <= OP_SULEA)
return static_cast<const TexInstruction *>(this);
return NULL;
}

View File

@ -2060,8 +2060,7 @@ Instruction::isActionEqual(const Instruction *that) const
if (this->asFlow()) {
return false;
} else {
if (this->atomic != that->atomic ||
this->ipa != that->ipa ||
if (this->ipa != that->ipa ||
this->lanes != that->lanes ||
this->perPatch != that->perPatch)
return false;
@ -2074,7 +2073,8 @@ Instruction::isActionEqual(const Instruction *that) const
this->rnd != that->rnd ||
this->ftz != that->ftz ||
this->dnz != that->dnz ||
this->cache != that->cache)
this->cache != that->cache ||
this->mask != that->mask)
return false;
return true;

View File

@ -132,7 +132,7 @@ const char *operationStr[OP_LAST + 1] =
"join",
"discard",
"exit",
"barrier",
"membar",
"vfetch",
"pfetch",
"export",
@ -148,23 +148,49 @@ const char *operationStr[OP_LAST + 1] =
"texgrad",
"texgather",
"texcsaa",
"suld",
"sust",
"texprep",
"suldb",
"suldp",
"sustb",
"sustp",
"suredb",
"suredp",
"sulea",
"subfm",
"suclamp",
"sueau",
"madsp",
"texbar",
"dfdx",
"dfdy",
"rdsv",
"wrsv",
"texprep",
"quadop",
"quadon",
"quadpop",
"popcnt",
"insbf",
"extbf",
"texbar",
"permt",
"atom",
"bar",
"vadd",
"vavg",
"vmin",
"vmax",
"vsad",
"vset",
"vshr",
"vshl",
"vsel",
"(invalid)"
};
static const char *atomSubOpStr[] =
{
"add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch"
};
static const char *DataTypeStr[] =
{
"-",
@ -488,8 +514,17 @@ void Instruction::print() const
PRINT("%s ", operationStr[op]);
if (op == OP_LINTERP || op == OP_PINTERP)
PRINT("%s ", interpStr[ipa]);
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
switch (op) {
case OP_SUREDP:
case OP_ATOM:
if (subOp < Elements(atomSubOpStr))
PRINT("%s ", atomSubOpStr[subOp]);
break;
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
break;
}
if (perPatch)
PRINT("patch ");
if (asTex())

View File

@ -44,11 +44,16 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] =
1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
1, 1, // EMIT, RESTART
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
1, 2, // SULD, SUST
1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 2, 2, 0, 0, // RDSV, WRSV, TEXPREP, QUADOP, QUADON, QUADPOP
2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR
1, 2, 2, 0, 0, // RDSV, WRSV, QUADOP, QUADON, QUADPOP
2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, // VSHR, VSHL, VSEL
0
};
@ -89,25 +94,37 @@ const OpClass Target::operationClass[OP_LAST + 1] =
// DISCARD, EXIT
OPCLASS_FLOW, OPCLASS_FLOW,
// MEMBAR
OPCLASS_OTHER,
OPCLASS_CONTROL,
// VFETCH, PFETCH, EXPORT
OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
// LINTERP, PINTERP
OPCLASS_SFU, OPCLASS_SFU,
// EMIT, RESTART
OPCLASS_OTHER, OPCLASS_OTHER,
// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA
OPCLASS_CONTROL, OPCLASS_CONTROL,
// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA; TEXPREP
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
// SULD, SUST
OPCLASS_SURFACE, OPCLASS_SURFACE,
// DFDX, DFDY, RDSV, WRSV; TEXPREP, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
// POPCNT, INSBF, EXTBF
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_TEXTURE,
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
// SUBFM, SUCLAMP, SUEAU, MADSP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
// POPCNT, INSBF, EXTBF, PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
// VSAD, VSET, VSHR, VSHL
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
// VSEL
OPCLASS_VECTOR,
OPCLASS_PSEUDO // LAST
};

View File

@ -111,7 +111,10 @@ enum OpClass
OPCLASS_SURFACE = 11,
OPCLASS_FLOW = 12,
OPCLASS_PSEUDO = 14,
OPCLASS_OTHER = 15
OPCLASS_VECTOR = 15,
OPCLASS_BITFIELD = 16,
OPCLASS_CONTROL = 17,
OPCLASS_OTHER = 18
};
class Target

View File

@ -113,19 +113,20 @@ void TargetNV50::initOpInfo()
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
// ADD,MAD,MUL,AND,OR,XOR,MAX,MIN
0x0670ca00, 0x0000003f, 0x00000000
0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
0x00010e40, 0x00000040, 0x00000498
0x00010e40, 0x00000040, 0x00000498, 0x00000000
};
static const operation noDestList[] =
{
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
OP_QUADON, OP_QUADPOP
OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP,
OP_SUREDB, OP_BAR
};
static const operation noPredList[] =
{

View File

@ -260,13 +260,13 @@ void TargetNVC0::initOpInfo()
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
// ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
0x0670ca00, 0x0000003f, 0x00000000
0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
0x0670ca00, 0x00000000, 0x00000000
0x0670ca00, 0x00000000, 0x00000000, 0x00000000
};
static const operation noDest[] =
@ -274,7 +274,8 @@ void TargetNVC0::initOpInfo()
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
OP_QUADON, OP_QUADPOP, OP_TEXBAR
OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP,
OP_SUREDB, OP_BAR
};
for (i = 0; i < DATA_FILE_COUNT; ++i)