radeon/llvm: Lower ROTL to BIT_ALIGN

2012-06-20 16:28:01 -04:00 · 2012-06-20 16:28:01 -04:00 · c53c8d0555
parent cd287301ec
commit c53c8d0555
6 changed files with 54 additions and 1 deletions
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
@ -57,6 +57,7 @@ namespace AMDGPUISD
 enum
 {
  AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
+  BITALIGN,
  FRACT,
  FMAX,
  SMAX,
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.td
@ -11,10 +11,24 @@
 //
 //===----------------------------------------------------------------------===//

+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //

+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
 // out = a - floor(a)
 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;

--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@ -564,7 +564,6 @@ AMDILTargetLowering::LowerMemArgument(

    // GPU doesn't have a rotl, rotr, or byteswap instruction
    setOperationAction(ISD::ROTR, VT, Expand);
-    setOperationAction(ISD::ROTL, VT, Expand);
    setOperationAction(ISD::BSWAP, VT, Expand);

    // GPU doesn't have any counting operators
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@ -33,6 +33,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :

  setOperationAction(ISD::FSUB, MVT::f32, Expand);

+  setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
  setSchedulingPreference(Sched::VLIW);
 }

@ -256,3 +258,29 @@ void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBl
          .addReg(PtrReg)
          .addImm(0);
 }
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::ROTL: return LowerROTL(Op, DAG);
+  }
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+                     Op.getOperand(0),
+                     Op.getOperand(0),
+                     DAG.getNode(ISD::SUB, DL, VT,
+                                 DAG.getConstant(32, MVT::i32),
+                                 Op.getOperand(1)));
+}
--- a/src/gallium/drivers/radeon/R600ISelLowering.h
+++ b/src/gallium/drivers/radeon/R600ISelLowering.h
@ -26,6 +26,7 @@ public:
  R600TargetLowering(TargetMachine &TM);
  virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
      MachineBasicBlock * BB) const;
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;

 private:
  const R600InstrInfo * TII;
@ -37,6 +38,9 @@ private:
  void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
      MachineRegisterInfo & MRI, unsigned dword_offset) const;

+  /// LowerROTL - Lower ROTL opcode to BITALIGN
+  SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
 };

 } // End namespace llvm;
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@ -821,6 +821,13 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
 /* ------------------------------- */

 let Predicates = [isEGorCayman] in {
+
+  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+    [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+                                          R600_Reg32:$src2))],
+    VecALU
+  >;
+
  def MULADD_eg : MULADD_Common<0x14>;
  def ASHR_eg : ASHR_Common<0x15>;
  def LSHR_eg : LSHR_Common<0x16>;