radeon/llvm: Turn on the BitExtract peephole optimization
Thie BitExtract optimization folds a mask and shift operation together into a single instruction (BFE_UINT).
This commit is contained in:
parent
c53c8d0555
commit
563a764110
|
@ -691,6 +691,11 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
|
|||
}
|
||||
Type *aType = inst->getType();
|
||||
bool isVector = aType->isVectorTy();
|
||||
|
||||
// XXX Support vector types
|
||||
if (isVector) {
|
||||
return false;
|
||||
}
|
||||
int numEle = 1;
|
||||
// This only works on 32bit integers
|
||||
if (aType->getScalarType()
|
||||
|
@ -792,23 +797,24 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
|
|||
callTypes.push_back(aType);
|
||||
callTypes.push_back(aType);
|
||||
FunctionType *funcType = FunctionType::get(aType, callTypes, false);
|
||||
std::string name = "__amdil_ubit_extract";
|
||||
std::string name = "llvm.AMDIL.bit.extract.u32";
|
||||
if (isVector) {
|
||||
name += "_v" + itostr(numEle) + "i32";
|
||||
name += ".v" + itostr(numEle) + "i32";
|
||||
} else {
|
||||
name += "_i32";
|
||||
name += ".";
|
||||
}
|
||||
// Lets create the function.
|
||||
Function *Func =
|
||||
dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
|
||||
getOrInsertFunction(llvm::StringRef(name), funcType));
|
||||
Value *Operands[3] = {
|
||||
newMaskConst,
|
||||
ShiftInst->getOperand(0),
|
||||
shiftValConst,
|
||||
ShiftInst->getOperand(0)
|
||||
newMaskConst
|
||||
};
|
||||
// Lets create the Call with the operands
|
||||
CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
|
||||
CI->setDoesNotAccessMemory();
|
||||
CI->insertBefore(inst);
|
||||
inst->replaceAllUsesWith(CI);
|
||||
return true;
|
||||
|
|
|
@ -822,6 +822,27 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
|
|||
|
||||
let Predicates = [isEGorCayman] in {
|
||||
|
||||
// BFE_UINT - bit_extract, an optimization for mask and shift
|
||||
// Src0 = Input
|
||||
// Src1 = Offset
|
||||
// Src2 = Width
|
||||
//
|
||||
// bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
|
||||
//
|
||||
// Example Usage:
|
||||
// (Offset, Width)
|
||||
//
|
||||
// (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
|
||||
// (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
|
||||
// (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
|
||||
// (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
|
||||
def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
|
||||
[(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
|
||||
R600_Reg32:$src1,
|
||||
R600_Reg32:$src2))],
|
||||
VecALU
|
||||
>;
|
||||
|
||||
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
|
||||
[(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
|
||||
R600_Reg32:$src2))],
|
||||
|
|
Loading…
Reference in New Issue