mesa/src/nouveau/codegen/nv50_ir_target_gm107.cpp

368 lines
8.6 KiB
C++

/*
* Copyright 2011 Christoph Bumiller
* 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "nv50_ir_target_gm107.h"
#include "nv50_ir_lowering_gm107.h"
namespace nv50_ir {
Target *getTargetGM107(unsigned int chipset)
{
return new TargetGM107(chipset);
}
// BULTINS / LIBRARY FUNCTIONS:
// lazyness -> will just hardcode everything for the time being
#include "lib/gm107.asm.h"
void
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
*code = (const uint32_t *)&gm107_builtin_code[0];
*size = sizeof(gm107_builtin_code);
}
uint32_t
TargetGM107::getBuiltinOffset(int builtin) const
{
assert(builtin < NVC0_BUILTIN_COUNT);
return gm107_builtin_offsets[builtin];
}
bool
TargetGM107::isOpSupported(operation op, DataType ty) const
{
switch (op) {
case OP_SAD:
case OP_POW:
case OP_DIV:
case OP_MOD:
return false;
case OP_SQRT:
if (ty == TYPE_F64)
return false;
return chipset >= NVISA_GM200_CHIPSET;
case OP_XMAD:
if (isFloatType(ty))
return false;
break;
default:
break;
}
return true;
}
// Return true when an instruction supports the reuse flag. When supported, the
// hardware will use the operand reuse cache introduced since Maxwell, which
// should try to reduce bank conflicts by caching values for the subsequent
// instructions. Note that the next instructions have to use the same GPR id in
// the same operand slot.
bool
TargetGM107::isReuseSupported(const Instruction *insn) const
{
const OpClass cl = getOpClass(insn->op);
// TODO: double-check!
switch (cl) {
case OPCLASS_ARITH:
case OPCLASS_COMPARE:
case OPCLASS_LOGIC:
case OPCLASS_MOVE:
case OPCLASS_SHIFT:
return true;
case OPCLASS_BITFIELD:
if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
return true;
break;
default:
break;
}
return false;
}
// Return true when an instruction requires to set up a barrier because it
// doesn't operate at a fixed latency. Variable latency instructions are memory
// operations, double precision operations, special function unit operations
// and other low throughput instructions.
bool
TargetGM107::isBarrierRequired(const Instruction *insn) const
{
const OpClass cl = getOpClass(insn->op);
if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
return true;
switch (cl) {
case OPCLASS_ATOMIC:
case OPCLASS_LOAD:
case OPCLASS_STORE:
case OPCLASS_SURFACE:
case OPCLASS_TEXTURE:
return true;
case OPCLASS_SFU:
switch (insn->op) {
case OP_COS:
case OP_EX2:
case OP_LG2:
case OP_LINTERP:
case OP_PINTERP:
case OP_RCP:
case OP_RSQ:
case OP_SIN:
case OP_SQRT:
return true;
default:
break;
}
break;
case OPCLASS_BITFIELD:
switch (insn->op) {
case OP_BFIND:
case OP_POPCNT:
return true;
default:
break;
}
break;
case OPCLASS_CONTROL:
switch (insn->op) {
case OP_EMIT:
case OP_RESTART:
return true;
default:
break;
}
break;
case OPCLASS_OTHER:
switch (insn->op) {
case OP_AFETCH:
case OP_PFETCH:
case OP_PIXLD:
case OP_SHFL:
return true;
case OP_RDSV:
return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
default:
break;
}
break;
case OPCLASS_ARITH:
if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
!isFloatType(insn->dType))
return true;
break;
case OPCLASS_CONVERT:
if (insn->def(0).getFile() != FILE_PREDICATE &&
insn->src(0).getFile() != FILE_PREDICATE)
return true;
break;
default:
break;
}
return false;
}
bool
TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
{
// TODO
return false;
}
// Return the number of stall counts needed to complete a single instruction.
// On Maxwell GPUs, the pipeline depth is 6, but some instructions require
// different number of stall counts like memory operations.
int
TargetGM107::getLatency(const Instruction *insn) const
{
// TODO: better values! This should be good enough for now though.
switch (insn->op) {
case OP_EMIT:
case OP_EXPORT:
case OP_PIXLD:
case OP_RESTART:
case OP_STORE:
case OP_SUSTB:
case OP_SUSTP:
return 1;
case OP_SHFL:
return 2;
case OP_ADD:
case OP_AND:
case OP_EXTBF:
case OP_FMA:
case OP_INSBF:
case OP_MAD:
case OP_MAX:
case OP_MIN:
case OP_MOV:
case OP_MUL:
case OP_NOT:
case OP_OR:
case OP_PREEX2:
case OP_PRESIN:
case OP_QUADOP:
case OP_SELP:
case OP_SET:
case OP_SET_AND:
case OP_SET_OR:
case OP_SET_XOR:
case OP_SHL:
case OP_SHLADD:
case OP_SHR:
case OP_SLCT:
case OP_SUB:
case OP_VOTE:
case OP_XOR:
case OP_XMAD:
if (insn->dType != TYPE_F64)
return 6;
break;
case OP_RDSV:
return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
case OP_ABS:
case OP_CEIL:
case OP_CVT:
case OP_FLOOR:
case OP_NEG:
case OP_SAT:
case OP_TRUNC:
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
insn->src(0).getFile() == FILE_PREDICATE))
return 6;
break;
case OP_BFIND:
case OP_COS:
case OP_EX2:
case OP_LG2:
case OP_POPCNT:
case OP_QUADON:
case OP_QUADPOP:
case OP_RCP:
case OP_RSQ:
case OP_SIN:
case OP_SQRT:
return 13;
default:
break;
}
// Use the maximum number of stall counts for other instructions.
return 15;
}
// Return the operand read latency which is the number of stall counts before
// an instruction can read its sources. For memory operations like ATOM, LOAD
// and STORE, the memory access has to be indirect.
int
TargetGM107::getReadLatency(const Instruction *insn) const
{
switch (insn->op) {
case OP_ABS:
case OP_BFIND:
case OP_CEIL:
case OP_COS:
case OP_EX2:
case OP_FLOOR:
case OP_LG2:
case OP_NEG:
case OP_POPCNT:
case OP_RCP:
case OP_RSQ:
case OP_SAT:
case OP_SIN:
case OP_SQRT:
case OP_SULDB:
case OP_SULDP:
case OP_SUREDB:
case OP_SUREDP:
case OP_SUSTB:
case OP_SUSTP:
case OP_TRUNC:
return 4;
case OP_CVT:
if (insn->def(0).getFile() != FILE_PREDICATE &&
insn->src(0).getFile() != FILE_PREDICATE)
return 4;
break;
case OP_ATOM:
case OP_LOAD:
case OP_STORE:
if (insn->src(0).isIndirect(0)) {
switch (insn->src(0).getFile()) {
case FILE_MEMORY_SHARED:
case FILE_MEMORY_CONST:
return 2;
case FILE_MEMORY_GLOBAL:
case FILE_MEMORY_LOCAL:
return 4;
default:
break;
}
}
break;
case OP_EXPORT:
case OP_PFETCH:
case OP_SHFL:
case OP_VFETCH:
return 2;
default:
break;
}
return 0;
}
bool
TargetGM107::isCS2RSV(SVSemantic sv) const
{
return sv == SV_CLOCK;
}
bool
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
{
if (stage == CG_STAGE_PRE_SSA) {
GM107LoweringPass pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
NVC0LegalizePostRA pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
GM107LegalizeSSA pass;
return pass.run(prog, false, true);
}
return false;
}
CodeEmitter *
TargetGM107::getCodeEmitter(Program::Type type)
{
return createCodeEmitterGM107(type);
}
} // namespace nv50_ir