nvc0: add maxwell (sm50) compiler backend

The big missing part here is proper sched data calculations, but
hopefully the chosen placeholder will be sufficient for now.

Passes piglit as well as GK107 does.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ben Skeggs 2014-05-09 15:56:05 +10:00
parent 7b9475fa65
commit d548d47edf
16 changed files with 3588 additions and 5 deletions

View File

@ -72,10 +72,13 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_util.cpp
NVC0_CODEGEN_SOURCES := \
codegen/nv50_ir_emit_gk110.cpp \
codegen/nv50_ir_emit_nvc0.cpp \
codegen/nv50_ir_emit_gk110.cpp \
codegen/nv50_ir_emit_gm107.cpp \
codegen/nv50_ir_lowering_nvc0.cpp \
codegen/nv50_ir_target_nvc0.cpp
codegen/nv50_ir_lowering_gm107.cpp \
codegen/nv50_ir_target_nvc0.cpp \
codegen/nv50_ir_target_gm107.cpp
NVC0_C_SOURCES := \
nvc0/nvc0_compute.c \

View File

@ -1,6 +1,6 @@
ENVYAS ?= envyas
all: gf100.asm.h gk104.asm.h gk110.asm.h
all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h
gf100.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@
@ -8,3 +8,5 @@ gk104.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@
gk110.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgk110 $< -o $@
gm107.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgm107 $< -o $@

View File

@ -0,0 +1,115 @@
.section #gm107_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gm107_div_u32:
sched 0x7e0 0x7e0 0x7e0
flo u32 $r2 $r1
lop xor 1 $r2 $r2 0x1f
mov $r3 0x1 0xf
sched 0x7e0 0x7e0 0x7e0
shl $r2 $r3 $r2
i2i u32 u32 $r1 neg $r1
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched 0x7e0 0x7e0 0x7e0
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched 0x7e0 0x7e0 0x7e0
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
i2i u32 u32 $r2 neg $r1
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 $r1 $r1 $r0 $r3
isetp ge u32 and $p0 1 $r1 $r2 1
$p0 iadd $r1 $r1 neg $r2
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r0 $r0 0x1
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
$p0 iadd $r1 $r1 neg $r2
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r0 $r0 0x1
ret
nop 0
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gm107_div_s32:
sched 0x7e0 0x7e0 0x7e0
isetp lt and $p2 0x1 $r0 0 1
isetp lt xor $p3 1 $r1 0 $p2
i2i s32 s32 $r0 abs $r0
sched 0x7e0 0x7e0 0x7e0
i2i s32 s32 $r1 abs $r1
flo u32 $r2 $r1
lop xor 1 $r2 $r2 0x1f
sched 0x7e0 0x7e0 0x7e0
mov $r3 0x1 0xf
shl $r2 $r3 $r2
i2i u32 u32 $r1 neg $r1
sched 0x7e0 0x7e0 0x7e0
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched 0x7e0 0x7e0 0x7e0
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
sched 0x7e0 0x7e0 0x7e0
i2i u32 u32 $r2 neg $r1
imad u32 u32 $r1 $r1 $r0 $r3
isetp ge u32 and $p0 1 $r1 $r2 1
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r1 $r1 neg $r2
$p0 iadd $r0 $r0 0x1
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r1 $r1 neg $r2
$p0 iadd $r0 $r0 0x1
$p3 i2i s32 s32 $r0 neg $r0
sched 0x7e0 0x7e0 0x7e0
$p2 i2i s32 s32 $r1 neg $r1
ret
nop 0
// STUB
gm107_rcp_f64:
gm107_rsq_f64:
sched 0x7e0 0x7e0 0x7e0
ret
nop 0
nop 0
.section #gm107_builtin_offsets
.b64 #gm107_div_u32
.b64 #gm107_div_s32
.b64 #gm107_rcp_f64
.b64 #gm107_rsq_f64

View File

@ -0,0 +1,97 @@
uint64_t gm107_builtin_code[] = {
/* 0x0000: gm107_div_u32 */
0x001f8000fc0007e0,
0x5c30000000170002,
0x3847040001f70202,
0x3898078000170003,
0x001f8000fc0007e0,
0x5c48000000270302,
0x5ce0200000170a01,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x5c98078000070003,
0x5c38008000270000,
0x5ce0200000170a02,
0x001f8000fc0007e0,
0x5a00018000070101,
0x5b6c038000270107,
0x5c11000000200101,
0x001f8000fc0007e0,
0x3810000000100000,
0x5b6c038000200107,
0x5c11000000200101,
0x001f8000fc0007e0,
0x3810000000100000,
0xe32000000007000f,
0x50b0000000070f00,
/* 0x0120: gm107_div_s32 */
0x001f8000fc0007e0,
0x5b6303800ff70017,
0x5b6341000ff7011f,
0x5ce2000000073a00,
0x001f8000fc0007e0,
0x5ce2000000173a01,
0x5c30000000170002,
0x3847040001f70202,
0x001f8000fc0007e0,
0x3898078000170003,
0x5c48000000270302,
0x5ce0200000170a01,
0x001f8000fc0007e0,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c98078000070003,
0x5c38008000270000,
0x001f8000fc0007e0,
0x5ce0200000170a02,
0x5a00018000070101,
0x5b6c038000270107,
0x001f8000fc0007e0,
0x5c11000000200101,
0x3810000000100000,
0x5b6c038000200107,
0x001f8000fc0007e0,
0x5c11000000200101,
0x3810000000100000,
0x5ce0200000033a00,
0x001f8000fc0007e0,
0x5ce0200000123a01,
0xe32000000007000f,
0x50b0000000070f00,
/* 0x0280: gm107_rcp_f64 */
/* 0x0280: gm107_rsq_f64 */
0x001f8000fc0007e0,
0xe32000000007000f,
0x50b0000000070f00,
0x50b0000000070f00,
};
uint64_t gm107_builtin_offsets[] = {
0x0000000000000000,
0x0000000000000120,
0x0000000000000280,
0x0000000000000280,
};

View File

@ -157,6 +157,7 @@ enum operation
OP_VSHL,
OP_VSEL,
OP_CCTL, // cache control
OP_SHFL, // warp shuffle
OP_LAST
};
@ -223,6 +224,10 @@ enum operation
#define NV50_IR_SUBOP_PIXLD_OFFSET 3
#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
#define NV50_IR_SUBOP_SHFL_IDX 0
#define NV50_IR_SUBOP_SHFL_UP 1
#define NV50_IR_SUBOP_SHFL_DOWN 2
#define NV50_IR_SUBOP_SHFL_BFLY 3
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
@ -379,6 +384,7 @@ enum SVSemantic
SV_LBASE,
SV_SBASE,
SV_VERTEX_STRIDE,
SV_INVOCATION_INFO,
SV_UNDEFINED,
SV_LAST
};

View File

@ -92,6 +92,7 @@ struct nv50_ir_prog_symbol
#define NVISA_GF100_CHIPSET_D0 0xd0
#define NVISA_GK104_CHIPSET 0xe0
#define NVISA_GK110_CHIPSET 0xf0
#define NVISA_GM107_CHIPSET 0x110
struct nv50_ir_prog_info
{

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,273 @@
/*
* Copyright 2011 Christoph Bumiller
* 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
#include "codegen/nv50_ir_target_nvc0.h"
#include "codegen/nv50_ir_lowering_gm107.h"
#include <limits>
namespace nv50_ir {
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
#define QOP_MOV2 3
// UL UR LL LR
#define QUADOP(q, r, s, t) \
((QOP_##q << 6) | (QOP_##r << 4) | \
(QOP_##s << 2) | (QOP_##t << 0))
bool
GM107LoweringPass::handleManualTXD(TexInstruction *i)
{
static const uint8_t qOps[4][2] =
{
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
};
Value *def[4][4];
Value *crd[3];
Value *tmp;
Instruction *tex, *add;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
tmp = bld.getScratch();
for (l = 0; l < 4; ++l) {
// mov coordinates from lane l to all lanes
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (c = 0; c < dim; ++c) {
bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c), bld.mkImm(l));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
add->subOp = 0x00;
add->lanes = 1; /* abused for .ndv */
}
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c) {
bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
add->subOp = qOps[l][0];
add->lanes = 1; /* abused for .ndv */
}
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c) {
bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
add->subOp = qOps[l][1];
add->lanes = 1; /* abused for .ndv */
}
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
tex->setSrc(c, crd[c]);
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
def[c][l] = bld.getSSA();
mov = bld.mkMov(def[c][l], tex->getDef(c));
mov->fixed = 1;
mov->lanes = 1 << l;
}
}
for (c = 0; i->defExists(c); ++c) {
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
for (l = 0; l < 4; ++l)
u->setSrc(l, def[c][l]);
}
i->bb->remove(i);
return true;
}
bool
GM107LoweringPass::handleDFDX(Instruction *insn)
{
Instruction *shfl;
int qop = 0, xid = 0;
switch (insn->op) {
case OP_DFDX:
qop = QUADOP(SUB, SUBR, SUB, SUBR);
xid = 1;
break;
case OP_DFDY:
qop = QUADOP(SUB, SUB, SUBR, SUBR);
xid = 2;
break;
default:
assert(!"invalid dfdx opcode");
break;
}
shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
insn->getSrc(0), bld.mkImm(xid));
shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
insn->op = OP_QUADOP;
insn->subOp = qop;
insn->lanes = 0; /* abused for !.ndv */
insn->setSrc(1, insn->getSrc(0));
insn->setSrc(0, shfl->getDef(0));
return true;
}
bool
GM107LoweringPass::handlePFETCH(Instruction *i)
{
Value *tmp0 = bld.getScratch();
Value *tmp1 = bld.getScratch();
Value *tmp2 = bld.getScratch();
bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
bld.mkOp1(OP_MOV , TYPE_U32, tmp2, bld.mkImm(i->getSrc(0)->reg.data.u32));
bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
i->setSrc(0, tmp0);
i->setSrc(1, NULL);
return true;
}
bool
GM107LoweringPass::handlePOPCNT(Instruction *i)
{
Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
i->getSrc(0), i->getSrc(1));
i->setSrc(0, tmp);
i->setSrc(1, NULL);
return TRUE;
}
//
// - add quadop dance for texturing
// - put FP outputs in GPRs
// - convert instruction sequences
//
bool
GM107LoweringPass::visit(Instruction *i)
{
bld.setPosition(i, false);
if (i->cc != CC_ALWAYS)
checkPredicate(i);
switch (i->op) {
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXG:
return handleTEX(i->asTex());
case OP_TXD:
return handleTXD(i->asTex());
case OP_TXLQ:
return handleTXLQ(i->asTex());
case OP_TXQ:
return handleTXQ(i->asTex());
case OP_EX2:
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
i->setSrc(0, i->getDef(0));
break;
case OP_POW:
return handlePOW(i);
case OP_DIV:
return handleDIV(i);
case OP_MOD:
return handleMOD(i);
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
return handleEXPORT(i);
case OP_PFETCH:
return handlePFETCH(i);
case OP_EMIT:
case OP_RESTART:
return handleOUT(i);
case OP_RDSV:
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
i->getSrc(0)->reg.fileIndex = 0;
} else
if (prog->getType() == Program::TYPE_GEOMETRY &&
i->src(0).isIndirect(0)) {
// XXX: this assumes vec4 units
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
}
break;
case OP_ATOM:
{
const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
handleATOM(i);
handleCasExch(i, cctl);
}
break;
case OP_SULDB:
case OP_SULDP:
case OP_SUSTB:
case OP_SUSTP:
case OP_SUREDB:
case OP_SUREDP:
handleSurfaceOpNVE4(i->asTex());
break;
case OP_DFDX:
case OP_DFDY:
handleDFDX(i);
break;
case OP_POPCNT:
handlePOPCNT(i);
break;
default:
break;
}
return true;
}
} // namespace nv50_ir

View File

@ -0,0 +1,18 @@
#include "codegen/nv50_ir_lowering_nvc0.h"
namespace nv50_ir {
class GM107LoweringPass : public NVC0LoweringPass
{
public:
GM107LoweringPass(Program *p) : NVC0LoweringPass(p) {}
private:
virtual bool visit(Instruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleDFDX(Instruction *);
bool handlePFETCH(Instruction *);
bool handlePOPCNT(Instruction *);
};
} // namespace nv50_ir

View File

@ -99,7 +99,7 @@ protected:
bool handleTEX(TexInstruction *);
bool handleTXD(TexInstruction *);
bool handleTXQ(TexInstruction *);
bool handleManualTXD(TexInstruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleTXLQ(TexInstruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);

View File

@ -187,6 +187,7 @@ const char *operationStr[OP_LAST + 1] =
"vshl",
"vsel",
"cctl",
"shfl",
"(invalid)"
};
@ -271,6 +272,7 @@ static const char *SemanticStr[SV_LAST + 1] =
"LBASE",
"SBASE",
"VERTEX_STRIDE",
"INVOCATION_INFO",
"?",
"(INVALID)"
};

View File

@ -256,6 +256,7 @@ private:
void texConstraintNV50(TexInstruction *);
void texConstraintNVC0(TexInstruction *);
void texConstraintNVE0(TexInstruction *);
void texConstraintGM107(TexInstruction *);
std::list<Instruction *> constrList;
@ -855,6 +856,7 @@ GCRA::coalesce(ArrayList& insns)
case 0xe0:
case 0xf0:
case 0x100:
case 0x110:
ret = doCoalesce(insns, JOIN_MASK_UNION);
break;
default:
@ -1880,6 +1882,34 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
constrList.push_back(merge);
}
void
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
{
int n, s;
if (isTextureOp(tex->op))
textureMask(tex);
condenseDefs(tex);
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
} else
if (isTextureOp(tex->op)) {
if (tex->op != OP_TXQ) {
s = tex->tex.target.getArgCount() - tex->tex.target.isMS();
n = tex->srcCount(0xff) - s;
} else {
s = tex->srcCount(0xff);
n = 0;
}
if (s > 1)
condenseSrcs(tex, 0, s - 1);
if (n > 1) // NOTE: first call modified positions already
condenseSrcs(tex, 1, n);
}
}
void
RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
{
@ -1987,6 +2017,9 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
case 0x100:
texConstraintNVE0(tex);
break;
case 0x110:
texConstraintGM107(tex);
break;
default:
break;
}

View File

@ -54,6 +54,7 @@ const uint8_t Target::operationSrcNr[] =
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
3, // SHFL
0
};
@ -126,10 +127,13 @@ const OpClass Target::operationClass[] =
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
// VSEL, CCTL
OPCLASS_VECTOR, OPCLASS_CONTROL,
// SHFL
OPCLASS_OTHER,
OPCLASS_PSEUDO // LAST
};
extern Target *getTargetGM107(unsigned int chipset);
extern Target *getTargetNVC0(unsigned int chipset);
extern Target *getTargetNV50(unsigned int chipset);
@ -138,6 +142,8 @@ Target *Target::create(unsigned int chipset)
STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1);
STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
switch (chipset & ~0xf) {
case 0x110:
return getTargetGM107(chipset);
case 0xc0:
case 0xd0:
case 0xe0:

View File

@ -78,7 +78,7 @@ public:
inline void *getRelocInfo() const { return relocInfo; }
void prepareEmission(Program *);
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);

View File

@ -0,0 +1,100 @@
/*
* Copyright 2011 Christoph Bumiller
* 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "codegen/nv50_ir_target_gm107.h"
#include "codegen/nv50_ir_lowering_gm107.h"
namespace nv50_ir {
Target *getTargetGM107(unsigned int chipset)
{
return new TargetGM107(chipset);
}
// BULTINS / LIBRARY FUNCTIONS:
// lazyness -> will just hardcode everything for the time being
#include "lib/gm107.asm.h"
void
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
*code = (const uint32_t *)&gm107_builtin_code[0];
*size = sizeof(gm107_builtin_code);
}
uint32_t
TargetGM107::getBuiltinOffset(int builtin) const
{
assert(builtin < NVC0_BUILTIN_COUNT);
return gm107_builtin_offsets[builtin];
}
bool
TargetGM107::isOpSupported(operation op, DataType ty) const
{
switch (op) {
case OP_MAD:
case OP_FMA:
if (ty != TYPE_F32)
return false;
break;
case OP_SAD:
case OP_POW:
case OP_SQRT:
case OP_DIV:
case OP_MOD:
return false;
default:
break;
}
return true;
}
bool
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
{
if (stage == CG_STAGE_PRE_SSA) {
GM107LoweringPass pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
NVC0LegalizePostRA pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
NVC0LegalizeSSA pass;
return pass.run(prog, false, true);
}
return false;
}
CodeEmitter *
TargetGM107::getCodeEmitter(Program::Type type)
{
return createCodeEmitterGM107(type);
}
} // namespace nv50_ir

View File

@ -0,0 +1,21 @@
#include "codegen/nv50_ir_target_nvc0.h"
namespace nv50_ir {
class TargetGM107 : public TargetNVC0
{
public:
TargetGM107(unsigned int chipset) : TargetNVC0(chipset) {}
virtual CodeEmitter *getCodeEmitter(Program::Type);
CodeEmitter *createCodeEmitterGM107(Program::Type);
virtual bool runLegalizePass(Program *, CGStage) const;
virtual void getBuiltinCode(const uint32_t **, uint32_t *) const;
virtual uint32_t getBuiltinOffset(int) const;
virtual bool isOpSupported(operation, DataType) const;
};
} // namespace nv50_ir