368 lines
8.6 KiB
C++
368 lines
8.6 KiB
C++
/*
|
|
* Copyright 2011 Christoph Bumiller
|
|
* 2014 Red Hat Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "nv50_ir_target_gm107.h"
|
|
#include "nv50_ir_lowering_gm107.h"
|
|
|
|
namespace nv50_ir {
|
|
|
|
Target *getTargetGM107(unsigned int chipset)
|
|
{
|
|
return new TargetGM107(chipset);
|
|
}
|
|
|
|
// BULTINS / LIBRARY FUNCTIONS:
|
|
|
|
// lazyness -> will just hardcode everything for the time being
|
|
|
|
#include "lib/gm107.asm.h"
|
|
|
|
void
|
|
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
|
|
{
|
|
*code = (const uint32_t *)&gm107_builtin_code[0];
|
|
*size = sizeof(gm107_builtin_code);
|
|
}
|
|
|
|
uint32_t
|
|
TargetGM107::getBuiltinOffset(int builtin) const
|
|
{
|
|
assert(builtin < NVC0_BUILTIN_COUNT);
|
|
return gm107_builtin_offsets[builtin];
|
|
}
|
|
|
|
bool
|
|
TargetGM107::isOpSupported(operation op, DataType ty) const
|
|
{
|
|
switch (op) {
|
|
case OP_SAD:
|
|
case OP_POW:
|
|
case OP_DIV:
|
|
case OP_MOD:
|
|
return false;
|
|
case OP_SQRT:
|
|
if (ty == TYPE_F64)
|
|
return false;
|
|
return chipset >= NVISA_GM200_CHIPSET;
|
|
case OP_XMAD:
|
|
if (isFloatType(ty))
|
|
return false;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Return true when an instruction supports the reuse flag. When supported, the
|
|
// hardware will use the operand reuse cache introduced since Maxwell, which
|
|
// should try to reduce bank conflicts by caching values for the subsequent
|
|
// instructions. Note that the next instructions have to use the same GPR id in
|
|
// the same operand slot.
|
|
bool
|
|
TargetGM107::isReuseSupported(const Instruction *insn) const
|
|
{
|
|
const OpClass cl = getOpClass(insn->op);
|
|
|
|
// TODO: double-check!
|
|
switch (cl) {
|
|
case OPCLASS_ARITH:
|
|
case OPCLASS_COMPARE:
|
|
case OPCLASS_LOGIC:
|
|
case OPCLASS_MOVE:
|
|
case OPCLASS_SHIFT:
|
|
return true;
|
|
case OPCLASS_BITFIELD:
|
|
if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
|
|
return true;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Return true when an instruction requires to set up a barrier because it
|
|
// doesn't operate at a fixed latency. Variable latency instructions are memory
|
|
// operations, double precision operations, special function unit operations
|
|
// and other low throughput instructions.
|
|
bool
|
|
TargetGM107::isBarrierRequired(const Instruction *insn) const
|
|
{
|
|
const OpClass cl = getOpClass(insn->op);
|
|
|
|
if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
|
|
return true;
|
|
|
|
switch (cl) {
|
|
case OPCLASS_ATOMIC:
|
|
case OPCLASS_LOAD:
|
|
case OPCLASS_STORE:
|
|
case OPCLASS_SURFACE:
|
|
case OPCLASS_TEXTURE:
|
|
return true;
|
|
case OPCLASS_SFU:
|
|
switch (insn->op) {
|
|
case OP_COS:
|
|
case OP_EX2:
|
|
case OP_LG2:
|
|
case OP_LINTERP:
|
|
case OP_PINTERP:
|
|
case OP_RCP:
|
|
case OP_RSQ:
|
|
case OP_SIN:
|
|
case OP_SQRT:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case OPCLASS_BITFIELD:
|
|
switch (insn->op) {
|
|
case OP_BFIND:
|
|
case OP_POPCNT:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case OPCLASS_CONTROL:
|
|
switch (insn->op) {
|
|
case OP_EMIT:
|
|
case OP_RESTART:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case OPCLASS_OTHER:
|
|
switch (insn->op) {
|
|
case OP_AFETCH:
|
|
case OP_PFETCH:
|
|
case OP_PIXLD:
|
|
case OP_SHFL:
|
|
return true;
|
|
case OP_RDSV:
|
|
return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case OPCLASS_ARITH:
|
|
if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
|
|
!isFloatType(insn->dType))
|
|
return true;
|
|
break;
|
|
case OPCLASS_CONVERT:
|
|
if (insn->def(0).getFile() != FILE_PREDICATE &&
|
|
insn->src(0).getFile() != FILE_PREDICATE)
|
|
return true;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
|
|
{
|
|
// TODO
|
|
return false;
|
|
}
|
|
|
|
// Return the number of stall counts needed to complete a single instruction.
|
|
// On Maxwell GPUs, the pipeline depth is 6, but some instructions require
|
|
// different number of stall counts like memory operations.
|
|
int
|
|
TargetGM107::getLatency(const Instruction *insn) const
|
|
{
|
|
// TODO: better values! This should be good enough for now though.
|
|
switch (insn->op) {
|
|
case OP_EMIT:
|
|
case OP_EXPORT:
|
|
case OP_PIXLD:
|
|
case OP_RESTART:
|
|
case OP_STORE:
|
|
case OP_SUSTB:
|
|
case OP_SUSTP:
|
|
return 1;
|
|
case OP_SHFL:
|
|
return 2;
|
|
case OP_ADD:
|
|
case OP_AND:
|
|
case OP_EXTBF:
|
|
case OP_FMA:
|
|
case OP_INSBF:
|
|
case OP_MAD:
|
|
case OP_MAX:
|
|
case OP_MIN:
|
|
case OP_MOV:
|
|
case OP_MUL:
|
|
case OP_NOT:
|
|
case OP_OR:
|
|
case OP_PREEX2:
|
|
case OP_PRESIN:
|
|
case OP_QUADOP:
|
|
case OP_SELP:
|
|
case OP_SET:
|
|
case OP_SET_AND:
|
|
case OP_SET_OR:
|
|
case OP_SET_XOR:
|
|
case OP_SHL:
|
|
case OP_SHLADD:
|
|
case OP_SHR:
|
|
case OP_SLCT:
|
|
case OP_SUB:
|
|
case OP_VOTE:
|
|
case OP_XOR:
|
|
case OP_XMAD:
|
|
if (insn->dType != TYPE_F64)
|
|
return 6;
|
|
break;
|
|
case OP_RDSV:
|
|
return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
|
|
case OP_ABS:
|
|
case OP_CEIL:
|
|
case OP_CVT:
|
|
case OP_FLOOR:
|
|
case OP_NEG:
|
|
case OP_SAT:
|
|
case OP_TRUNC:
|
|
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
|
|
insn->src(0).getFile() == FILE_PREDICATE))
|
|
return 6;
|
|
break;
|
|
case OP_BFIND:
|
|
case OP_COS:
|
|
case OP_EX2:
|
|
case OP_LG2:
|
|
case OP_POPCNT:
|
|
case OP_QUADON:
|
|
case OP_QUADPOP:
|
|
case OP_RCP:
|
|
case OP_RSQ:
|
|
case OP_SIN:
|
|
case OP_SQRT:
|
|
return 13;
|
|
default:
|
|
break;
|
|
}
|
|
// Use the maximum number of stall counts for other instructions.
|
|
return 15;
|
|
}
|
|
|
|
// Return the operand read latency which is the number of stall counts before
|
|
// an instruction can read its sources. For memory operations like ATOM, LOAD
|
|
// and STORE, the memory access has to be indirect.
|
|
int
|
|
TargetGM107::getReadLatency(const Instruction *insn) const
|
|
{
|
|
switch (insn->op) {
|
|
case OP_ABS:
|
|
case OP_BFIND:
|
|
case OP_CEIL:
|
|
case OP_COS:
|
|
case OP_EX2:
|
|
case OP_FLOOR:
|
|
case OP_LG2:
|
|
case OP_NEG:
|
|
case OP_POPCNT:
|
|
case OP_RCP:
|
|
case OP_RSQ:
|
|
case OP_SAT:
|
|
case OP_SIN:
|
|
case OP_SQRT:
|
|
case OP_SULDB:
|
|
case OP_SULDP:
|
|
case OP_SUREDB:
|
|
case OP_SUREDP:
|
|
case OP_SUSTB:
|
|
case OP_SUSTP:
|
|
case OP_TRUNC:
|
|
return 4;
|
|
case OP_CVT:
|
|
if (insn->def(0).getFile() != FILE_PREDICATE &&
|
|
insn->src(0).getFile() != FILE_PREDICATE)
|
|
return 4;
|
|
break;
|
|
case OP_ATOM:
|
|
case OP_LOAD:
|
|
case OP_STORE:
|
|
if (insn->src(0).isIndirect(0)) {
|
|
switch (insn->src(0).getFile()) {
|
|
case FILE_MEMORY_SHARED:
|
|
case FILE_MEMORY_CONST:
|
|
return 2;
|
|
case FILE_MEMORY_GLOBAL:
|
|
case FILE_MEMORY_LOCAL:
|
|
return 4;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case OP_EXPORT:
|
|
case OP_PFETCH:
|
|
case OP_SHFL:
|
|
case OP_VFETCH:
|
|
return 2;
|
|
default:
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
bool
|
|
TargetGM107::isCS2RSV(SVSemantic sv) const
|
|
{
|
|
return sv == SV_CLOCK;
|
|
}
|
|
|
|
bool
|
|
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
|
|
{
|
|
if (stage == CG_STAGE_PRE_SSA) {
|
|
GM107LoweringPass pass(prog);
|
|
return pass.run(prog, false, true);
|
|
} else
|
|
if (stage == CG_STAGE_POST_RA) {
|
|
NVC0LegalizePostRA pass(prog);
|
|
return pass.run(prog, false, true);
|
|
} else
|
|
if (stage == CG_STAGE_SSA) {
|
|
GM107LegalizeSSA pass;
|
|
return pass.run(prog, false, true);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
CodeEmitter *
|
|
TargetGM107::getCodeEmitter(Program::Type type)
|
|
{
|
|
return createCodeEmitterGM107(type);
|
|
}
|
|
|
|
} // namespace nv50_ir
|