radeon/llvm: Add R600ExpandSpecialInstrs pass
This pass expends reduction instructions into a MachineInstrBundle that contains 4 instruction, one for each instruction slot.
This commit is contained in:
parent
0588298575
commit
82a5d0c641
|
@ -22,6 +22,7 @@ class AMDGPUTargetMachine;
|
|||
// R600 Passes
|
||||
FunctionPass* createR600KernelParametersPass(const TargetData* TD);
|
||||
FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#define MO_FLAG_ABS (1 << 2)
|
||||
#define MO_FLAG_MASK (1 << 3)
|
||||
#define MO_FLAG_PUSH (1 << 4)
|
||||
#define MO_FLAG_LAST (1 << 5)
|
||||
|
||||
#define OPCODE_IS_ZERO_INT 0x00000045
|
||||
#define OPCODE_IS_NOT_ZERO_INT 0x00000042
|
||||
|
|
|
@ -158,6 +158,12 @@ bool AMDGPUPassConfig::addPreEmitPass() {
|
|||
PM->add(createAMDGPUCFGPreparationPass(*TM));
|
||||
PM->add(createAMDGPUCFGStructurizerPass(*TM));
|
||||
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
PM->add(createR600ExpandSpecialInstrsPass(*TM));
|
||||
addPass(FinalizeMachineBundlesID);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ CPP_SOURCES := \
|
|||
AMDGPUInstrInfo.cpp \
|
||||
AMDGPURegisterInfo.cpp \
|
||||
R600CodeEmitter.cpp \
|
||||
R600ExpandSpecialInstrs.cpp \
|
||||
R600ISelLowering.cpp \
|
||||
R600InstrInfo.cpp \
|
||||
R600KernelParameters.cpp \
|
||||
|
|
|
@ -50,7 +50,6 @@ private:
|
|||
const R600InstrInfo * TII;
|
||||
|
||||
bool IsCube;
|
||||
bool IsReduction;
|
||||
bool IsVector;
|
||||
unsigned currentElement;
|
||||
bool IsLast;
|
||||
|
@ -60,7 +59,7 @@ private:
|
|||
public:
|
||||
|
||||
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
|
||||
_OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
|
||||
_OS(OS), TM(NULL), IsCube(false), IsVector(false),
|
||||
IsLast(true) { }
|
||||
|
||||
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
|
||||
|
@ -159,10 +158,9 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
|
||||
E = MBB.instr_end(); I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
IsReduction = TII->isReductionOp(MI.getOpcode());
|
||||
IsVector = TII->isVector(MI);
|
||||
IsCube = TII->isCubeOp(MI.getOpcode());
|
||||
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
|
||||
|
@ -172,7 +170,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
EmitTexInstr(MI);
|
||||
} else if (TII->isFCOp(MI.getOpcode())){
|
||||
EmitFCInstr(MI);
|
||||
} else if (IsReduction || IsVector || IsCube) {
|
||||
} else if (IsVector || IsCube) {
|
||||
IsLast = false;
|
||||
// XXX: On Cayman, some (all?) of the vector instructions only need
|
||||
// to fill the first three slots.
|
||||
|
@ -180,7 +178,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
IsLast = (currentElement == 3);
|
||||
EmitALUInstr(MI);
|
||||
}
|
||||
IsReduction = false;
|
||||
IsVector = false;
|
||||
IsCube = false;
|
||||
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||
|
@ -310,8 +307,6 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
|
|||
// Emit the source channel (1 byte)
|
||||
if (chan_override != -1) {
|
||||
EmitByte(chan_override);
|
||||
} else if (IsReduction) {
|
||||
EmitByte(currentElement);
|
||||
} else if (MO.isReg()) {
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
} else {
|
||||
|
@ -353,7 +348,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
|
|||
EmitByte(getHWReg(MO.getReg()));
|
||||
|
||||
// Emit the element of the destination register (1 byte)
|
||||
if (IsReduction || IsCube || IsVector) {
|
||||
if (IsCube || IsVector) {
|
||||
EmitByte(currentElement);
|
||||
} else {
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
|
@ -367,7 +362,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
|
|||
}
|
||||
|
||||
// Emit writemask (1 byte).
|
||||
if (((IsReduction || IsVector) &&
|
||||
if ((IsVector &&
|
||||
currentElement != TRI->getHWRegChan(MO.getReg()))
|
||||
|| MO.getTargetFlags() & MO_FLAG_MASK) {
|
||||
EmitByte(0);
|
||||
|
@ -389,11 +384,14 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
|
|||
EmitTwoBytes(getBinaryCodeForInstr(MI));
|
||||
|
||||
// Emit IsLast (for this instruction group) (1 byte)
|
||||
if (IsLast) {
|
||||
EmitByte(1);
|
||||
} else {
|
||||
if (!IsLast ||
|
||||
(MI.isInsideBundle() &&
|
||||
!(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) {
|
||||
EmitByte(0);
|
||||
} else {
|
||||
EmitByte(1);
|
||||
}
|
||||
|
||||
// Emit isOp3 (1 byte)
|
||||
if (numSrc == 3) {
|
||||
EmitByte(1);
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector, Reduction, and Cube instructions need to fill the entire instruction
|
||||
// group to work correctly. This pass expands these individual instructions
|
||||
// into several instructions that will completely fill the instruction group.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const R600InstrInfo *TII;
|
||||
|
||||
public:
|
||||
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
|
||||
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "R600 Expand special instructions pass";
|
||||
}
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char R600ExpandSpecialInstrsPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
|
||||
return new R600ExpandSpecialInstrsPass(TM);
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
while (I != MBB.end()) {
|
||||
MachineInstr &MI = *I;
|
||||
I = llvm::next(I);
|
||||
|
||||
if (!TII->isReductionOp(MI.getOpcode())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Expand the instruction
|
||||
for (unsigned Chan = 0; Chan < 4; Chan++) {
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
unsigned Src0 = MI.getOperand(1).getReg();
|
||||
unsigned Src1 = MI.getOperand(2).getReg();
|
||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
||||
unsigned NewSrc0 = TRI.getSubReg(Src0, SubRegIndex);
|
||||
unsigned NewSrc1 = TRI.getSubReg(Src1, SubRegIndex);
|
||||
unsigned DstBase = TRI.getHWRegIndex(DstReg);
|
||||
unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
|
||||
Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
|
||||
MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true);
|
||||
NewDstOp.addTargetFlag(Flags);
|
||||
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
|
||||
.addOperand(NewDstOp)
|
||||
.addReg(NewSrc0)
|
||||
.addReg(NewSrc1)
|
||||
->setIsInsideBundle(Chan != 0);
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
Loading…
Reference in New Issue