radeon/llvm: Use the MCCodeEmitter for SI

2012-08-17 19:42:11 +00:00 · 2012-08-17 19:42:11 +00:00 · 235318a578
parent 2de24024c1
commit 235318a578
15 changed files with 591 additions and 431 deletions
--- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.cpp
@ -1,6 +1,9 @@

 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/TargetRegistry.h"

 using namespace llvm;
@ -14,3 +17,91 @@ static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
 }
+
+/// runOnMachineFunction - We need to override this function so we can avoid
+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  if (STM.dumpCode()) {
+    MF.dump();
+  }
+  SetupMachineFunction(MF);
+  if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    EmitProgramInfo(MF);
+  }
+  EmitFunctionBody();
+  return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+  unsigned MaxSGPR = 0;
+  unsigned MaxVGPR = 0;
+  bool VCCUsed = false;
+  const SIRegisterInfo * RI =
+                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                    I != E; ++I) {
+      MachineInstr &MI = *I;
+
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        MachineOperand & MO = MI.getOperand(op_idx);
+        unsigned maxUsed;
+        unsigned width = 0;
+        bool isSGPR = false;
+        unsigned reg;
+        unsigned hwReg;
+        if (!MO.isReg()) {
+          continue;
+        }
+        reg = MO.getReg();
+        if (reg == AMDGPU::VCC) {
+          VCCUsed = true;
+          continue;
+        }
+        if (AMDGPU::SReg_32RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 1;
+        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 1;
+        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 2;
+        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 2;
+        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 4;
+        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 4;
+        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 8;
+        } else {
+          assert("!Unknown register class");
+        }
+        hwReg = RI->getHWRegNum(reg);
+        maxUsed = hwReg + width - 1;
+        if (isSGPR) {
+          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+        } else {
+          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+        }
+      }
+    }
+  }
+  if (VCCUsed) {
+    MaxSGPR += 2;
+  }
+  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
+  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
+  OutStreamer.EmitIntValue(MFI->spi_ps_input_addr, 4);
+}
--- a/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h
+++ b/src/gallium/drivers/radeon/AMDGPUAsmPrinter.h
@ -12,10 +12,16 @@ public:
  explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
    : AsmPrinter(TM, Streamer) { }

+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
  virtual const char *getPassName() const {
    return "AMDGPU Assembly Printer";
  }

+  /// EmitProgramInfo - Emit register usage information so that the GPU driver
+  /// can correctly setup the GPU state.
+  void EmitProgramInfo(MachineFunction &MF);
+
  /// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
  virtual void EmitInstruction(const MachineInstr *MI);
 };
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@ -73,17 +73,18 @@ bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                              formatted_raw_ostream &Out,
                                              CodeGenFileType FileType,
                                              bool DisableVerify) {
-  // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
-  // only using it to access addPassesToGenerateCode()
-  bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
-                                                     DisableVerify);
-  assert(fail);

  const AMDGPUSubtarget &STM = getSubtarget<AMDGPUSubtarget>();
  std::string gpu = STM.getDeviceName();
  if (gpu == "SI") {
-    PM.add(createSICodeEmitterPass(Out));
+    return LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
+                                                     DisableVerify);
  } else if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
+    // only using it to access addPassesToGenerateCode()
+    bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
+                                                     DisableVerify);
+    assert(fail);
    PM.add(createR600CodeEmitterPass(Out));
  } else {
    abort();
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUAsmBackend.cpp
@ -0,0 +1,80 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDILMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+  AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                        const MCAsmLayout &Layout) {
+    //XXX: Implement if necessary.
+  }
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCAsmLayout &Layout,
+                                const MCFragment *Fragment,
+                                const MCFixup &Fixup,
+                                MCValue Target, uint64_t &FixedValue) {
+    assert(!"Not implemented");
+  }
+
+  virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+  AMDGPUAsmBackend(const Target &T)
+    : MCAsmBackend() {}
+
+  virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+  virtual unsigned getNumFixupKinds() const { return 0; };
+  virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                          uint64_t Value) const { assert(!"Not implemented"); }
+  virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                                    const MCInstFragment *DF,
+                                    const MCAsmLayout &Layout) const {
+    return false;
+  }
+  virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+    assert(!"Not implemented");
+  }
+  virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+  virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { 
+    return true;
+  }
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+                                       const MCAsmLayout &Layout) {
+  for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+    Asm.writeSectionData(I, Layout);
+  }
+}
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
+  return new AMDGPUAsmBackend(T);
+}
+
+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
+                                                        raw_ostream &OS) const {
+  return new AMDGPUMCObjectWriter(OS);
+}
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/src/gallium/drivers/radeon/MCTargetDesc/AMDGPUMCCodeEmitter.h
@ -0,0 +1,59 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+  class MCInst;
+  class MCOperand;
+
+  class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+  public:
+
+    uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+    virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+      return 0;
+    }
+
+    virtual unsigned GPR4AlignEncode(const MCInst  &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+      return 0;
+    }
+    virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+      return 0;
+    }
+    virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
+      return Value;
+    }
+    virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+      return 0;
+    }
+    virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+      return 0;
+    }
+  };
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDILMCAsmInfo.cpp
+++ b/src/gallium/drivers/radeon/MCTargetDesc/AMDILMCAsmInfo.cpp
@ -19,6 +19,8 @@
 using namespace llvm;
 AMDILMCAsmInfo::AMDILMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
 {
+  HasSingleParameterDotFile = false;
+  WeakDefDirective = NULL;
  //===------------------------------------------------------------------===//
  HasSubsectionsViaSymbols = true;
  HasMachoZeroFillDirective = false;
@ -67,16 +69,10 @@ AMDILMCAsmInfo::AMDILMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
  LCOMMDirectiveType = LCOMM::None;
  COMMDirectiveAlignmentIsInBytes = false;
  HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = true;
  HasNoDeadStrip = true;
  HasSymbolResolver = false;
  WeakRefDirective = ".weakref\t";
-  WeakDefDirective = ".weakdef\t";
  LinkOnceDirective = NULL;
-  HiddenVisibilityAttr = MCSA_Hidden;
-  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
-  ProtectedVisibilityAttr = MCSA_Protected;
-
  //===--- Dwarf Emission Directives -----------------------------------===//
  HasLEB128 = true;
  SupportsDebugInformation = true;
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDILMCTargetDesc.cpp
+++ b/src/gallium/drivers/radeon/MCTargetDesc/AMDILMCTargetDesc.cpp
@ -5,6 +5,7 @@
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
@ -56,6 +57,15 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
  return new AMDGPUInstPrinter(MAI, MII, MRI);
 }

+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+                                    MCContext &Ctx, MCAsmBackend &MAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+}
+
 extern "C" void LLVMInitializeAMDGPUTargetMC() {

  RegisterMCAsmInfo<AMDILMCAsmInfo> Y(TheAMDGPUTarget);
@ -69,4 +79,10 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
  TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);

  TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+
+  TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+
+  TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+
+  TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
 }
--- a/src/gallium/drivers/radeon/MCTargetDesc/AMDILMCTargetDesc.h
+++ b/src/gallium/drivers/radeon/MCTargetDesc/AMDILMCTargetDesc.h
@ -15,12 +15,24 @@
 #ifndef AMDILMCTARGETDESC_H
 #define AMDILMCTARGETDESC_H

+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;

 extern Target TheAMDGPUTarget;

+MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+                                         const MCSubtargetInfo &STI,
+                                         MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
 } // End llvm namespace

 #define GET_REGINFO_ENUM
--- a/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/MCTargetDesc/SIMCCodeEmitter.cpp
@ -0,0 +1,309 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The SI code emitter produces machine code that can be executed directly on
+// the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDILMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define LITERAL_REG 255
+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
+#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
+
+
+// These must be kept in sync with SIInstructions.td and also the
+// InstrEncodingInfo array in SIInstrInfo.cpp.
+//
+// NOTE: This enum is only used to identify the encoding type within LLVM,
+// the actual encoding type that is part of the instruction format is different
+namespace SIInstrEncodingType {
+  enum Encoding {
+    EXP = 0,
+    LDS = 1,
+    MIMG = 2,
+    MTBUF = 3,
+    MUBUF = 4,
+    SMRD = 5,
+    SOP1 = 6,
+    SOP2 = 7,
+    SOPC = 8,
+    SOPK = 9,
+    SOPP = 10,
+    VINTRP = 11,
+    VOP1 = 12,
+    VOP2 = 13,
+    VOP3 = 14,
+    VOPC = 15
+  };
+}
+
+using namespace llvm;
+
+namespace {
+class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
+  SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
+  MCContext &Ctx;
+
+public:
+  SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                  MCContext &ctx)
+    : MCII(mcii), STI(sti), Ctx(ctx) { }
+
+  ~SIMCCodeEmitter() { }
+
+  /// getBinaryCodeForInstr - Function generated by tablegen for encoding
+  /// instructions based on the *.td files.
+//  virtual uint64_t getBinaryCodeForInstr(const MCInst &MI,
+//                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// EncodeInstruction - Encode the instruction and write it to the OS.
+  virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Reutrn the encoding for an MCOperand.
+  virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+public:
+
+  /// GPRAlign - Encode a sequence of registers with the correct alignment.
+  unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
+
+  /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
+  virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
+                                   SmallVectorImpl<MCFixup> &Fixup) const;
+
+  /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used 
+  virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
+                                   SmallVectorImpl<MCFixup> &Fixup) const;
+
+  /// i32LiteralEncode - Encode an i32 literal this is used as an operand
+  /// for an instruction in place of a register.
+  virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
+                                   SmallVectorImpl<MCFixup> &Fixup) const;
+
+  /// SMRDmemriEncode - Encoding for SMRD indexed loads
+  virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+                                   SmallVectorImpl<MCFixup> &Fixup) const;
+
+  /// VOPPostEncode - Post-Encoder method for VOP instructions 
+  virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
+
+private:
+
+  ///getEncodingType =  Return this SIInstrEncodingType for this instruction.
+  unsigned getEncodingType(const MCInst &MI) const;
+
+  ///getEncodingBytes - Get then size in bytes of this instructions encoding.
+  unsigned getEncodingBytes(const MCInst &MI) const;
+
+  /// getRegBinaryCode - Returns the hardware encoding for a register
+  unsigned getRegBinaryCode(unsigned reg) const;
+
+  /// getHWRegNum - Generated function that returns the hardware encoding for
+  /// a register
+  unsigned getHWRegNum(unsigned reg) const;
+
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+                                               const MCSubtargetInfo &STI,
+                                               MCContext &Ctx) {
+  if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
+    return new SIMCCodeEmitter(MCII, STI, Ctx);
+  } else {
+    return NULL;
+  }
+}
+
+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
+  unsigned bytes = getEncodingBytes(MI);
+  for (unsigned i = 0; i < bytes; i++) {
+    OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+  }
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                            const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    return getRegBinaryCode(MO.getReg());
+  } else if (MO.isImm()) {
+    return MO.getImm();
+  } else if (MO.isFPImm()) {
+    // XXX: Not all instructions can use inline literals
+    // XXX: We should make sure this is a 32-bit constant
+    return LITERAL_REG;
+  } else{
+    llvm_unreachable("Encoding of this operand type is not supported yet.");
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Operand Encodings
+//===----------------------------------------------------------------------===//
+
+unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
+                                   unsigned shift) const {
+  unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
+  return regCode >> shift;
+  return 0;
+}
+unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
+                                          unsigned OpNo ,
+                                        SmallVectorImpl<MCFixup> &Fixup) const {
+  return GPRAlign(MI, OpNo, 1);
+}
+
+unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
+                                          unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixup) const {
+  return GPRAlign(MI, OpNo, 2);
+}
+
+uint64_t SIMCCodeEmitter::i32LiteralEncode(const MCInst &MI,
+                                           unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixup) const {
+  return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
+}
+
+#define SMRD_OFFSET_MASK 0xff
+#define SMRD_IMM_SHIFT 8
+#define SMRD_SBASE_MASK 0x3f
+#define SMRD_SBASE_SHIFT 9
+/// SMRDmemriEncode - This function is responsibe for encoding the offset
+/// and the base ptr for SMRD instructions it should return a bit string in
+/// this format:
+///
+/// OFFSET = bits{7-0}
+/// IMM    = bits{8}
+/// SBASE  = bits{14-9}
+///
+uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixup) const {
+  uint32_t Encoding;
+
+  const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
+
+  //XXX: Use this function for SMRD loads with register offsets
+  assert(OffsetOp.isImm());
+
+  Encoding =
+      (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
+    | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
+    | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
+    ;
+
+  return Encoding;
+}
+
+//===----------------------------------------------------------------------===//
+// Post Encoder Callbacks
+//===----------------------------------------------------------------------===//
+
+uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
+  unsigned encodingType = getEncodingType(MI);
+  unsigned numSrcOps;
+  unsigned vgprBitOffset;
+
+  if (encodingType == SIInstrEncodingType::VOP3) {
+    numSrcOps = 3;
+    vgprBitOffset = 32;
+  } else {
+    numSrcOps = 1;
+    vgprBitOffset = 0;
+  }
+
+  // Add one to skip over the destination reg operand.
+  for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
+    const MCOperand &MO = MI.getOperand(opIdx);
+    if (MO.isReg()) {
+      unsigned reg = MI.getOperand(opIdx).getReg();
+      if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
+          AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
+        Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
+      }
+    } else if (MO.isFPImm()) {
+      // XXX: Not all instructions can use inline literals
+      // XXX: We should make sure this is a 32-bit constant
+      Value |= ((uint64_t)MO.getFPImm()) << 32;
+    }
+  }
+  return Value;
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
+  return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
+}
+
+unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
+
+  // Instructions with literal constants are expanded to 64-bits, and
+  // the constant is stored in bits [63:32]
+  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+    if (MI.getOperand(i).isFPImm()) {
+      return 8;
+    }
+  }
+
+  // This instruction always has a literal
+  if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
+    return 8;
+  }
+
+  unsigned encoding_type = getEncodingType(MI);
+  switch (encoding_type) {
+    case SIInstrEncodingType::EXP:
+    case SIInstrEncodingType::LDS:
+    case SIInstrEncodingType::MUBUF:
+    case SIInstrEncodingType::MTBUF:
+    case SIInstrEncodingType::MIMG:
+    case SIInstrEncodingType::VOP3:
+      return 8;
+    default:
+      return 4;
+  }
+}
+
+
+unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
+  switch (reg) {
+    case AMDGPU::M0: return 124;
+    case AMDGPU::SREG_LIT_0: return 128;
+    default: return getHWRegNum(reg);
+  }
+}
+
+#define SIRegisterInfo SIMCCodeEmitter
+#include "SIRegisterGetHWRegNum.inc"
+#undef SIRegisterInfo
+
+#include "AMDGPUGenMCCodeEmitter.inc"
--- a/src/gallium/drivers/radeon/Makefile
+++ b/src/gallium/drivers/radeon/Makefile
@ -65,6 +65,9 @@ AMDGPUGenIntrinsics.inc: *.td
 AMDGPUGenCodeEmitter.inc: *.td
 	$(call tablegen, -gen-emitter, AMDGPU.td, $@)

+AMDGPUGenMCCodeEmitter.inc: *.td
+	$(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@)
+
 AMDGPUGenDFAPacketizer.inc: *.td
 	$(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)

--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@ -13,6 +13,7 @@ GENERATED_SOURCES := \
 	AMDGPUGenEDInfo.inc		\
 	AMDGPUGenIntrinsics.inc		\
 	AMDGPUGenCodeEmitter.inc	\
+	AMDGPUGenMCCodeEmitter.inc	\
 	AMDGPUGenDFAPacketizer.inc

 CPP_SOURCES := \
@ -44,14 +45,15 @@ CPP_SOURCES := \
 	R600MachineFunctionInfo.cpp	\
 	R600RegisterInfo.cpp		\
 	SIAssignInterpRegs.cpp		\
-	SICodeEmitter.cpp		\
 	SIInstrInfo.cpp			\
 	SIISelLowering.cpp		\
 	SIMachineFunctionInfo.cpp	\
 	SIRegisterInfo.cpp		\
 	InstPrinter/AMDGPUInstPrinter.cpp \
 	MCTargetDesc/AMDILMCAsmInfo.cpp	\
+	MCTargetDesc/AMDGPUAsmBackend.cpp \
 	MCTargetDesc/AMDILMCTargetDesc.cpp	\
+	MCTargetDesc/SIMCCodeEmitter.cpp \
 	TargetInfo/AMDILTargetInfo.cpp	\
 	radeon_llvm_emit.cpp

--- a/src/gallium/drivers/radeon/SICodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/SICodeEmitter.cpp
@ -1,354 +0,0 @@
-//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The SI code emitter produces machine code that can be executed directly on
-// the GPU device.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "AMDGPU.h"
-#include "AMDGPUCodeEmitter.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <map>
-#include <stdio.h>
-
-#define LITERAL_REG 255
-#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
-using namespace llvm;
-
-namespace {
-
-  class SICodeEmitter : public MachineFunctionPass, public AMDGPUCodeEmitter {
-
-  private:
-    static char ID;
-    formatted_raw_ostream &_OS;
-    const TargetMachine *TM;
-
-    //Program Info
-    unsigned MaxSGPR;
-    unsigned MaxVGPR;
-    unsigned CurrentInstrIndex;
-    std::map<int, unsigned> BBIndexes;
-
-    void InitProgramInfo(MachineFunction &MF);
-    void EmitState(MachineFunction & MF);
-    void emitInstr(MachineInstr &MI);
-
-    void outputBytes(uint64_t value, unsigned bytes);
-    unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift)
-                                                                      const;
-
-  public:
-    SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
-        _OS(OS), TM(NULL), MaxSGPR(0), MaxVGPR(0), CurrentInstrIndex(0) { }
-    const char *getPassName() const { return "SI Code Emitter"; }
-    bool runOnMachineFunction(MachineFunction &MF);
-
-    /// getMachineOpValue - Return the encoding for MO
-    virtual uint64_t getMachineOpValue(const MachineInstr &MI,
-                                       const MachineOperand &MO) const;
-
-    /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used 
-    virtual unsigned GPR4AlignEncode(const MachineInstr  &MI, unsigned OpNo)
-                                                                      const;
-
-    /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
-    virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo)
-                                                                      const;
-    /// i32LiteralEncode - Encode an i32 literal this is used as an operand
-    /// for an instruction in place of a register.
-    virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
-                                                                      const;
-    /// SMRDmemriEncode - Encoding for SMRD indexed loads
-    virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
-                                                                     const;
-
-    /// VOPPostEncode - Post-Encoder method for VOP instructions 
-    virtual uint64_t VOPPostEncode(const MachineInstr &MI,
-                                   uint64_t Value) const;
-  };
-}
-
-char SICodeEmitter::ID = 0;
-
-FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) {
-  return new SICodeEmitter(OS);
-}
-
-void SICodeEmitter::EmitState(MachineFunction & MF) {
-  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-  outputBytes(MaxSGPR + 1, 4);
-  outputBytes(MaxVGPR + 1, 4);
-  outputBytes(MFI->spi_ps_input_addr, 4);
-}
-
-void SICodeEmitter::InitProgramInfo(MachineFunction &MF) {
-  unsigned InstrIndex = 0;
-  bool VCCUsed = false;
-  const SIRegisterInfo * RI =
-                static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
-
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                  BB != BB_E; ++BB) {
-    MachineBasicBlock &MBB = *BB;
-    BBIndexes[MBB.getNumber()] = InstrIndex;
-    InstrIndex += MBB.size();
-    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-                                                    I != E; ++I) {
-      MachineInstr &MI = *I;
-
-      unsigned numOperands = MI.getNumOperands();
-      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
-        MachineOperand & MO = MI.getOperand(op_idx);
-        unsigned maxUsed;
-        unsigned width = 0;
-        bool isSGPR = false;
-        unsigned reg;
-        unsigned hwReg;
-        if (!MO.isReg()) {
-          continue;
-        }
-        reg = MO.getReg();
-        if (reg == AMDGPU::VCC) {
-          VCCUsed = true;
-          continue;
-        }
-        if (AMDGPU::SReg_32RegClass.contains(reg)) {
-          isSGPR = true;
-          width = 1;
-        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
-          isSGPR = false;
-          width = 1;
-        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
-          isSGPR = true;
-          width = 2;
-        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
-          isSGPR = false;
-          width = 2;
-        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
-          isSGPR = true;
-          width = 4;
-        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
-          isSGPR = false;
-          width = 4;
-        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
-          isSGPR = true;
-          width = 8;
-        } else {
-          assert("!Unknown register class");
-        }
-        hwReg = RI->getHWRegNum(reg);
-        maxUsed = hwReg + width - 1;
-        if (isSGPR) {
-          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
-        } else {
-          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
-        }
-      }
-    }
-  }
-  if (VCCUsed) {
-    MaxSGPR += 2;
-  }
-}
-
-bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
-{
-  TM = &MF.getTarget();
-  const AMDGPUSubtarget &STM = TM->getSubtarget<AMDGPUSubtarget>();
-
-  if (STM.dumpCode()) {
-    MF.dump();
-  }
-
-  InitProgramInfo(MF);
-
-  EmitState(MF);
-
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                  BB != BB_E; ++BB) {
-    MachineBasicBlock &MBB = *BB;
-    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-                                                      I != E; ++I) {
-      MachineInstr &MI = *I;
-      if (MI.getOpcode() != AMDGPU::KILL && MI.getOpcode() != AMDGPU::RETURN) {
-        emitInstr(MI);
-        CurrentInstrIndex++;
-      }
-    }
-  }
-  // Emit S_END_PGM
-  MachineInstr * End = BuildMI(MF, DebugLoc(),
-                               TM->getInstrInfo()->get(AMDGPU::S_ENDPGM));
-  emitInstr(*End);
-  return false;
-}
-
-void SICodeEmitter::emitInstr(MachineInstr &MI)
-{
-  const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
-
-  uint64_t hwInst = getBinaryCodeForInstr(MI);
-
-  if ((hwInst & 0xffffffff) == 0xffffffff) {
-    fprintf(stderr, "Unsupported Instruction: \n");
-    MI.dump();
-    abort();
-  }
-
-  unsigned bytes = SII->getEncodingBytes(MI);
-  outputBytes(hwInst, bytes);
-}
-
-uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                          const MachineOperand &MO) const
-{
-  const SIRegisterInfo * RI =
-                static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
-
-  switch(MO.getType()) {
-  case MachineOperand::MO_Register:
-    return RI->getBinaryCode(MO.getReg());
-
-  case MachineOperand::MO_Immediate:
-    return MO.getImm();
-
-  case MachineOperand::MO_FPImmediate:
-    // XXX: Not all instructions can use inline literals
-    // XXX: We should make sure this is a 32-bit constant
-    return LITERAL_REG;
-
-  case MachineOperand::MO_MachineBasicBlock:
-    return (*BBIndexes.find(MI.getParent()->getNumber())).second -
-           CurrentInstrIndex - 1;
-  default:
-    llvm_unreachable("Encoding of this operand type is not supported yet.");
-    break;
-  }
-}
-
-unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo,
-    unsigned shift) const
-{
-  const SIRegisterInfo * RI =
-                static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
-  unsigned regCode = RI->getHWRegNum(MI.getOperand(OpNo).getReg());
-  return regCode >> shift;
-}
-
-unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI,
-    unsigned OpNo) const
-{
-  return GPRAlign(MI, OpNo, 2);
-}
-
-unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI,
-    unsigned OpNo) const
-{
-  return GPRAlign(MI, OpNo, 1);
-}
-
-uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
-    unsigned OpNo) const
-{
-  return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
-}
-
-#define SMRD_OFFSET_MASK 0xff
-#define SMRD_IMM_SHIFT 8
-#define SMRD_SBASE_MASK 0x3f
-#define SMRD_SBASE_SHIFT 9
-/// SMRDmemriEncode - This function is responsibe for encoding the offset
-/// and the base ptr for SMRD instructions it should return a bit string in
-/// this format:
-///
-/// OFFSET = bits{7-0}
-/// IMM    = bits{8}
-/// SBASE  = bits{14-9}
-///
-uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI,
-    unsigned OpNo) const
-{
-  uint32_t encoding;
-
-  const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1);
-
-  //XXX: Use this function for SMRD loads with register offsets
-  assert(OffsetOp.isImm());
-
-  encoding =
-      (getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK)
-    | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
-    | ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
-    ;
-
-  return encoding;
-}
-
-/// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
-/// XXX: It would be nice if we could handle this without a PostEncode function.
-uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
-    uint64_t Value) const
-{
-  const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
-  unsigned encodingType = SII->getEncodingType(MI);
-  unsigned numSrcOps;
-  unsigned vgprBitOffset;
-
-  if (encodingType == SIInstrEncodingType::VOP3) {
-    numSrcOps = 3;
-    vgprBitOffset = 32;
-  } else {
-    numSrcOps = 1;
-    vgprBitOffset = 0;
-  }
-
-  // Add one to skip over the destination reg operand.
-  for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
-    const MachineOperand &MO = MI.getOperand(opIdx);
-    switch(MO.getType()) {
-    case MachineOperand::MO_Register:
-      {
-        unsigned reg = MI.getOperand(opIdx).getReg();
-        if (AMDGPU::VReg_32RegClass.contains(reg)
-            || AMDGPU::VReg_64RegClass.contains(reg)) {
-          Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
-        }
-      }
-      break;
-
-    case MachineOperand::MO_FPImmediate:
-      // XXX: Not all instructions can use inline literals
-      // XXX: We should make sure this is a 32-bit constant
-      Value |= (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32);
-      continue;
-
-    default:
-      break;
-    }
-  }
-  return Value;
-}
-
-
-void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes)
-{
-  for (unsigned i = 0; i < bytes; i++) {
-    _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
-  }
-}
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@ -49,41 +49,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   .addReg(SrcReg, getKillRegState(KillSrc));
 }

-unsigned SIInstrInfo::getEncodingType(const MachineInstr &MI) const
-{
-  return get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
-}
-
-unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
-{
-
-  /* Instructions with literal constants are expanded to 64-bits, and
-   * the constant is stored in bits [63:32] */
-  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
-    if (MI.getOperand(i).getType() == MachineOperand::MO_FPImmediate) {
-      return 8;
-    }
-  }
-
-  /* This instruction always has a literal */
-  if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
-    return 8;
-  }
-
-  unsigned encoding_type = getEncodingType(MI);
-  switch (encoding_type) {
-    case SIInstrEncodingType::EXP:
-    case SIInstrEncodingType::LDS:
-    case SIInstrEncodingType::MUBUF:
-    case SIInstrEncodingType::MTBUF:
-    case SIInstrEncodingType::MIMG:
-    case SIInstrEncodingType::VOP3:
-      return 8;
-    default:
-      return 4;
-  }
-}
-
 MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
                                           int64_t Imm) const
 {
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@ -52,34 +52,6 @@ public:

 } // End namespace llvm

-// These must be kept in sync with SIInstructions.td and also the
-// InstrEncodingInfo array in SIInstrInfo.cpp.
-//
-// NOTE: This enum is only used to identify the encoding type within LLVM,
-// the actual encoding type that is part of the instruction format is different
-namespace SIInstrEncodingType {
-  enum Encoding {
-    EXP = 0,
-    LDS = 1,
-    MIMG = 2,
-    MTBUF = 3,
-    MUBUF = 4,
-    SMRD = 5,
-    SOP1 = 6,
-    SOP2 = 7,
-    SOPC = 8,
-    SOPK = 9,
-    SOPP = 10,
-    VINTRP = 11,
-    VOP1 = 12,
-    VOP2 = 13,
-    VOP3 = 14,
-    VOPC = 15
-  };
-}
-
-#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
-
 namespace SIInstrFlags {
  enum Flags {
    // First 4 bits are the instruction encoding
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
@ -52,6 +52,7 @@ using namespace llvm;
 #ifndef EXTERNAL_LLVM
 extern "C" {

+void LLVMInitializeAMDGPUAsmPrinter(void);
 void LLVMInitializeAMDGPUTargetMC(void);
 void LLVMInitializeAMDGPUTarget(void);
 void LLVMInitializeAMDGPUTargetInfo(void);
@ -93,6 +94,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
   LLVMInitializeAMDGPUTargetInfo();
   LLVMInitializeAMDGPUTarget();
   LLVMInitializeAMDGPUTargetMC();
+   LLVMInitializeAMDGPUAsmPrinter();
 #endif
   std::string err;
   const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
@ -132,7 +134,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
   formatted_raw_ostream out(oStream);

   /* Optional extra paramater true / false to disable verify */
-   if (AMDGPUTargetMachine.addPassesToEmitFile(PM, out, TargetMachine::CGFT_AssemblyFile,
+   if (AMDGPUTargetMachine.addPassesToEmitFile(PM, out, TargetMachine::CGFT_ObjectFile,
                                               true)){
      fprintf(stderr, "AddingPasses failed.\n");
      return 1;