radeon/llvm: Lower ROTL to BIT_ALIGN
This commit is contained in:
parent
cd287301ec
commit
c53c8d0555
|
@ -57,6 +57,7 @@ namespace AMDGPUISD
|
|||
enum
|
||||
{
|
||||
AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
|
||||
BITALIGN,
|
||||
FRACT,
|
||||
FMAX,
|
||||
SMAX,
|
||||
|
|
|
@ -11,10 +11,24 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Nodes
|
||||
//
|
||||
|
||||
// out = ((a << 32) | b) >> c)
|
||||
//
|
||||
// Can be used to optimize rtol:
|
||||
// rotl(a, b) = bitalign(a, a, 32 - b)
|
||||
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
|
||||
|
||||
// out = a - floor(a)
|
||||
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
||||
|
||||
|
|
|
@ -564,7 +564,6 @@ AMDILTargetLowering::LowerMemArgument(
|
|||
|
||||
// GPU doesn't have a rotl, rotr, or byteswap instruction
|
||||
setOperationAction(ISD::ROTR, VT, Expand);
|
||||
setOperationAction(ISD::ROTL, VT, Expand);
|
||||
setOperationAction(ISD::BSWAP, VT, Expand);
|
||||
|
||||
// GPU doesn't have any counting operators
|
||||
|
|
|
@ -33,6 +33,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
|||
|
||||
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
||||
|
||||
setSchedulingPreference(Sched::VLIW);
|
||||
}
|
||||
|
||||
|
@ -256,3 +258,29 @@ void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBl
|
|||
.addReg(PtrReg)
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG Lowering Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::ROTL: return LowerROTL(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
|
||||
Op.getOperand(0),
|
||||
Op.getOperand(0),
|
||||
DAG.getNode(ISD::SUB, DL, VT,
|
||||
DAG.getConstant(32, MVT::i32),
|
||||
Op.getOperand(1)));
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ public:
|
|||
R600TargetLowering(TargetMachine &TM);
|
||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MachineBasicBlock * BB) const;
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
private:
|
||||
const R600InstrInfo * TII;
|
||||
|
@ -37,6 +38,9 @@ private:
|
|||
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineRegisterInfo & MRI, unsigned dword_offset) const;
|
||||
|
||||
/// LowerROTL - Lower ROTL opcode to BITALIGN
|
||||
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm;
|
||||
|
|
|
@ -821,6 +821,13 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
|
|||
/* ------------------------------- */
|
||||
|
||||
let Predicates = [isEGorCayman] in {
|
||||
|
||||
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
|
||||
[(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
|
||||
R600_Reg32:$src2))],
|
||||
VecALU
|
||||
>;
|
||||
|
||||
def MULADD_eg : MULADD_Common<0x14>;
|
||||
def ASHR_eg : ASHR_Common<0x15>;
|
||||
def LSHR_eg : LSHR_Common<0x16>;
|
||||
|
|
Loading…
Reference in New Issue