radeon/llvm: Turn on the BitExtract peephole optimization

Thie BitExtract optimization folds a mask and shift operation together
into a single instruction (BFE_UINT).
This commit is contained in:
Tom Stellard 2012-06-20 17:43:11 -04:00
parent c53c8d0555
commit 563a764110
2 changed files with 32 additions and 5 deletions

View File

@ -691,6 +691,11 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
} }
Type *aType = inst->getType(); Type *aType = inst->getType();
bool isVector = aType->isVectorTy(); bool isVector = aType->isVectorTy();
// XXX Support vector types
if (isVector) {
return false;
}
int numEle = 1; int numEle = 1;
// This only works on 32bit integers // This only works on 32bit integers
if (aType->getScalarType() if (aType->getScalarType()
@ -792,23 +797,24 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
callTypes.push_back(aType); callTypes.push_back(aType);
callTypes.push_back(aType); callTypes.push_back(aType);
FunctionType *funcType = FunctionType::get(aType, callTypes, false); FunctionType *funcType = FunctionType::get(aType, callTypes, false);
std::string name = "__amdil_ubit_extract"; std::string name = "llvm.AMDIL.bit.extract.u32";
if (isVector) { if (isVector) {
name += "_v" + itostr(numEle) + "i32"; name += ".v" + itostr(numEle) + "i32";
} else { } else {
name += "_i32"; name += ".";
} }
// Lets create the function. // Lets create the function.
Function *Func = Function *Func =
dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
getOrInsertFunction(llvm::StringRef(name), funcType)); getOrInsertFunction(llvm::StringRef(name), funcType));
Value *Operands[3] = { Value *Operands[3] = {
newMaskConst, ShiftInst->getOperand(0),
shiftValConst, shiftValConst,
ShiftInst->getOperand(0) newMaskConst
}; };
// Lets create the Call with the operands // Lets create the Call with the operands
CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
CI->setDoesNotAccessMemory();
CI->insertBefore(inst); CI->insertBefore(inst);
inst->replaceAllUsesWith(CI); inst->replaceAllUsesWith(CI);
return true; return true;

View File

@ -822,6 +822,27 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
let Predicates = [isEGorCayman] in { let Predicates = [isEGorCayman] in {
// BFE_UINT - bit_extract, an optimization for mask and shift
// Src0 = Input
// Src1 = Offset
// Src2 = Width
//
// bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
//
// Example Usage:
// (Offset, Width)
//
// (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
// (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
// (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
// (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
[(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
R600_Reg32:$src1,
R600_Reg32:$src2))],
VecALU
>;
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
[(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
R600_Reg32:$src2))], R600_Reg32:$src2))],