swr/rast: Updating code style based on current clang-format rules

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Alok Hota 2018-06-22 09:11:26 -05:00 committed by Tim Rowley
parent f90a60fe79
commit 5b7d4f9428
4 changed files with 265 additions and 258 deletions

View File

@ -59,7 +59,7 @@ using namespace SwrJit;
//////////////////////////////////////////////////////////////////////////
/// @brief Contructor for JitManager.
/// @param simdWidth - SIMD width to be used in generated program.
JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
JitManager::JitManager(uint32_t simdWidth, const char* arch, const char* core) :
mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
mArch(arch)
{
@ -153,7 +153,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
}
#if LLVM_USE_INTEL_JITEVENTS
JITEventListener *vTune = JITEventListener::createIntelJITEventListener();
JITEventListener* vTune = JITEventListener::createIntelJITEventListener();
mpExec->RegisterJITEventListener(vTune);
#endif
@ -163,7 +163,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
#else
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
std::vector<Type *> fsArgs;
std::vector<Type*> fsArgs;
// llvm5 is picky and does not take a void * type
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
@ -212,21 +212,21 @@ void JitManager::SetupNewModule()
}
DIType *
JitManager::CreateDebugStructType(StructType * pType,
const std::string & name,
DIFile * pFile,
DIType*
JitManager::CreateDebugStructType(StructType* pType,
const std::string& name,
DIFile* pFile,
uint32_t lineNum,
const std::vector<std::pair<std::string, uint32_t>> &members)
const std::vector<std::pair<std::string, uint32_t>>& members)
{
DIBuilder builder(*mpCurrentModule);
SmallVector<Metadata *, 8> ElemTypes;
DataLayout DL = DataLayout(mpCurrentModule);
uint32_t size = DL.getTypeAllocSizeInBits(pType);
uint32_t alignment = DL.getABITypeAlignment(pType);
DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
DIBuilder builder(*mpCurrentModule);
SmallVector<Metadata*, 8> ElemTypes;
DataLayout DL = DataLayout(mpCurrentModule);
uint32_t size = DL.getTypeAllocSizeInBits(pType);
uint32_t alignment = DL.getABITypeAlignment(pType);
DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
DICompositeType *pDIStructTy = builder.createStructType(pFile,
DICompositeType* pDIStructTy = builder.createStructType(pFile,
name,
pFile,
lineNum,
@ -240,14 +240,14 @@ JitManager::CreateDebugStructType(StructType *
mDebugStructMap[pType] = pDIStructTy;
uint32_t idx = 0;
for (auto &elem : pType->elements())
for (auto& elem : pType->elements())
{
std::string name = members[idx].first;
uint32_t lineNum = members[idx].second;
size = DL.getTypeAllocSizeInBits(elem);
alignment = DL.getABITypeAlignment(elem);
uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
llvm::DIType *pDebugTy = GetDebugType(elem);
llvm::DIType* pDebugTy = GetDebugType(elem);
ElemTypes.push_back(builder.createMemberType(
pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
@ -258,22 +258,22 @@ JitManager::CreateDebugStructType(StructType *
return pDIStructTy;
}
DIType *JitManager::GetDebugArrayType(Type *pTy)
DIType* JitManager::GetDebugArrayType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
DataLayout DL = DataLayout(mpCurrentModule);
ArrayType *pArrayTy = cast<ArrayType>(pTy);
ArrayType* pArrayTy = cast<ArrayType>(pTy);
uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
SmallVector<Metadata *, 8> Elems;
SmallVector<Metadata*, 8> Elems;
Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements()));
return builder.createArrayType(
size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
}
// Create a DIType from llvm Type
DIType *JitManager::GetDebugType(Type *pTy)
DIType* JitManager::GetDebugType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
Type::TypeID id = pTy->getTypeID();
@ -317,17 +317,17 @@ DIType *JitManager::GetDebugType(Type *pTy)
}
// Create a DISubroutineType from an llvm FunctionType
DIType *JitManager::GetDebugFunctionType(Type *pTy)
DIType* JitManager::GetDebugFunctionType(Type* pTy)
{
SmallVector<Metadata *, 8> ElemTypes;
FunctionType * pFuncTy = cast<FunctionType>(pTy);
DIBuilder builder(*mpCurrentModule);
SmallVector<Metadata*, 8> ElemTypes;
FunctionType* pFuncTy = cast<FunctionType>(pTy);
DIBuilder builder(*mpCurrentModule);
// Add result type
ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType()));
// Add arguments
for (auto &param : pFuncTy->params())
for (auto& param : pFuncTy->params())
{
ElemTypes.push_back(GetDebugType(param));
}
@ -335,10 +335,10 @@ DIType *JitManager::GetDebugFunctionType(Type *pTy)
return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes));
}
DIType *JitManager::GetDebugIntegerType(Type *pTy)
DIType* JitManager::GetDebugIntegerType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
IntegerType *pIntTy = cast<IntegerType>(pTy);
IntegerType* pIntTy = cast<IntegerType>(pTy);
switch (pIntTy->getBitWidth())
{
case 1:
@ -365,14 +365,14 @@ DIType *JitManager::GetDebugIntegerType(Type *pTy)
return nullptr;
}
DIType *JitManager::GetDebugVectorType(Type *pTy)
DIType* JitManager::GetDebugVectorType(Type* pTy)
{
DIBuilder builder(*mpCurrentModule);
VectorType * pVecTy = cast<VectorType>(pTy);
DataLayout DL = DataLayout(mpCurrentModule);
uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
uint32_t alignment = DL.getABITypeAlignment(pVecTy);
SmallVector<Metadata *, 1> Elems;
DIBuilder builder(*mpCurrentModule);
VectorType* pVecTy = cast<VectorType>(pTy);
DataLayout DL = DataLayout(mpCurrentModule);
uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
uint32_t alignment = DL.getABITypeAlignment(pVecTy);
SmallVector<Metadata*, 1> Elems;
Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements()));
return builder.createVectorType(size,
@ -385,7 +385,7 @@ DIType *JitManager::GetDebugVectorType(Type *pTy)
/// @brief Dump function x86 assembly to file.
/// @note This should only be called after the module has been jitted to x86 and the
/// module will not be further accessed.
void JitManager::DumpAsm(Function *pFunction, const char *fileName)
void JitManager::DumpAsm(Function* pFunction, const char* fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
@ -393,15 +393,15 @@ void JitManager::DumpAsm(Function *pFunction, const char *fileName)
DWORD pid = GetCurrentProcessId();
char procname[MAX_PATH];
GetModuleFileNameA(NULL, procname, MAX_PATH);
const char * pBaseName = strrchr(procname, '\\');
const char* pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
CreateDirectoryPath(outDir.str().c_str());
#endif
std::error_code EC;
Module * pModule = pFunction->getParent();
const char * funcName = pFunction->getName().data();
Module* pModule = pFunction->getParent();
const char* funcName = pFunction->getName().data();
char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
@ -411,11 +411,12 @@ void JitManager::DumpAsm(Function *pFunction, const char *fileName)
raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
legacy::PassManager *pMPasses = new legacy::PassManager();
auto * pTarget = mpExec->getTargetMachine();
legacy::PassManager* pMPasses = new legacy::PassManager();
auto* pTarget = mpExec->getTargetMachine();
pTarget->Options.MCOptions.AsmVerbose = true;
#if LLVM_VERSION_MAJOR >= 7
pTarget->addPassesToEmitFile(*pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
pTarget->addPassesToEmitFile(
*pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
#else
pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
#endif
@ -431,7 +432,7 @@ std::string JitManager::GetOutputDir()
DWORD pid = GetCurrentProcessId();
char procname[MAX_PATH];
GetModuleFileNameA(NULL, procname, MAX_PATH);
const char * pBaseName = strrchr(procname, '\\');
const char* pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid;
CreateDirectoryPath(outDir.str().c_str());
@ -442,14 +443,14 @@ std::string JitManager::GetOutputDir()
//////////////////////////////////////////////////////////////////////////
/// @brief Dump function to file.
void JitManager::DumpToFile(Module *M, const char *fileName)
void JitManager::DumpToFile(Module* M, const char* fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
std::string outDir = GetOutputDir();
std::error_code EC;
const char * funcName = M->getName().data();
const char* funcName = M->getName().data();
char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
@ -464,14 +465,14 @@ void JitManager::DumpToFile(Module *M, const char *fileName)
//////////////////////////////////////////////////////////////////////////
/// @brief Dump function to file.
void JitManager::DumpToFile(Function *f, const char *fileName)
void JitManager::DumpToFile(Function* f, const char* fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
std::string outDir = GetOutputDir();
std::error_code EC;
const char * funcName = f->getName().data();
const char* funcName = f->getName().data();
char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
@ -489,7 +490,7 @@ void JitManager::DumpToFile(Function *f, const char *fileName)
fd.flush();
raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
WriteGraph(fd_cfg, (const Function *)f);
WriteGraph(fd_cfg, (const Function*)f);
fd_cfg.flush();
}
@ -501,7 +502,7 @@ bool g_DllActive = true;
//////////////////////////////////////////////////////////////////////////
/// @brief Create JIT context.
/// @param simdWidth - SIMD width to be used in generated program.
HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char *arch, const char *core)
HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core)
{
return new JitManager(targetSimdWidth, arch, core);
}
@ -512,7 +513,7 @@ void JITCALL JitDestroyContext(HANDLE hJitContext)
{
if (g_DllActive)
{
delete reinterpret_cast<JitManager *>(hJitContext);
delete reinterpret_cast<JitManager*>(hJitContext);
}
}
}
@ -528,8 +529,8 @@ struct JitCacheFileHeader
{
void Init(uint32_t llCRC,
uint32_t objCRC,
const std::string &moduleID,
const std::string &cpu,
const std::string& moduleID,
const std::string& cpu,
uint32_t optLevel,
uint64_t objSize)
{
@ -545,7 +546,7 @@ struct JitCacheFileHeader
bool
IsValid(uint32_t llCRC, const std::string &moduleID, const std::string &cpu, uint32_t optLevel)
IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel)
{
if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) ||
(m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel))
@ -576,7 +577,7 @@ private:
static const size_t JC_STR_MAX_LEN = 32;
static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
(LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
((sizeof(void *) > sizeof(uint32_t)) ? 1 : 0);
((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
uint64_t m_objSize = 0;
@ -588,7 +589,7 @@ private:
char m_Cpu[JC_STR_MAX_LEN] = {};
};
static inline uint32_t ComputeModuleCRC(const llvm::Module *M)
static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
{
std::string bitcodeBuffer;
raw_string_ostream bitcodeStream(bitcodeBuffer);
@ -611,7 +612,7 @@ JitCache::JitCache()
#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0)
{
char *homedir;
char* homedir;
if (!(homedir = getenv("HOME")))
{
homedir = getpwuid(getuid())->pw_dir;
@ -626,15 +627,15 @@ JitCache::JitCache()
}
}
int ExecUnhookedProcess(const std::string &CmdLine, std::string *pStdOut, std::string *pStdErr)
int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
{
return ExecCmd(CmdLine, "", pStdOut, pStdErr);
}
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj)
void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
{
const std::string &moduleID = M->getModuleIdentifier();
const std::string& moduleID = M->getModuleIdentifier();
if (!moduleID.length())
{
return;
@ -671,7 +672,7 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize());
fileObj.write((const char *)&header, sizeof(header));
fileObj.write((const char*)&header, sizeof(header));
fileObj.flush();
}
}
@ -679,9 +680,9 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
/// object which corresponds with Module M, or 0 if an object is not
/// available.
std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
{
const std::string &moduleID = M->getModuleIdentifier();
const std::string& moduleID = M->getModuleIdentifier();
mCurrentModuleCRC = ComputeModuleCRC(M);
if (!moduleID.length())
@ -700,8 +701,8 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
llvm::SmallString<MAX_PATH> objFilePath = filePath;
objFilePath += JIT_OBJ_EXT;
FILE *fpObjIn = nullptr;
FILE *fpIn = fopen(filePath.c_str(), "rb");
FILE* fpObjIn = nullptr;
FILE* fpIn = fopen(filePath.c_str(), "rb");
if (!fpIn)
{
return nullptr;
@ -732,7 +733,7 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
#else
pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
#endif
if (!fread(const_cast<char *>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
{
pBuf = nullptr;
break;

View File

@ -37,12 +37,12 @@ namespace SwrJit
{
using namespace llvm;
BuilderGfxMem::BuilderGfxMem(JitManager *pJitMgr) : Builder(pJitMgr)
BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) : Builder(pJitMgr)
{
mpTranslationFuncTy = nullptr;
mpfnTranslateGfxAddressForRead = nullptr;
mpTranslationFuncTy = nullptr;
mpfnTranslateGfxAddressForRead = nullptr;
mpfnTranslateGfxAddressForWrite = nullptr;
mpParamSimDC = nullptr;
mpParamSimDC = nullptr;
}
@ -50,7 +50,7 @@ namespace SwrJit
{
}
void BuilderGfxMem::AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage)
void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
{
SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
@ -65,10 +65,10 @@ namespace SwrJit
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
Value *BuilderGfxMem::GATHERPS(Value * vSrc,
Value * pBase,
Value * vIndices,
Value * vMask,
Value* BuilderGfxMem::GATHERPS(Value* vSrc,
Value* pBase,
Value* vIndices,
Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
{
@ -78,7 +78,7 @@ namespace SwrJit
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
}
Value *vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
return vGather;
}
@ -90,10 +90,10 @@ namespace SwrJit
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
Value *BuilderGfxMem::GATHERDD(Value * vSrc,
Value * pBase,
Value * vIndices,
Value * vMask,
Value* BuilderGfxMem::GATHERDD(Value* vSrc,
Value* pBase,
Value* vIndices,
Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
{
@ -104,7 +104,7 @@ namespace SwrJit
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
}
Value *vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
return vGather;
}
@ -122,37 +122,37 @@ namespace SwrJit
}
Value *BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset)
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
}
Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, Idx, nullptr, Name);
}
Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ty, Ptr, Idx, Name);
}
Value *BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty)
Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, indexList);
}
Value *
BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
Value*
BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, indexList);
}
Value *BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
{
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
@ -167,7 +167,7 @@ namespace SwrJit
return Ptr;
}
LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
@ -175,7 +175,7 @@ namespace SwrJit
return Builder::LOAD(Ptr, Name);
}
LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
@ -183,8 +183,9 @@ namespace SwrJit
return Builder::LOAD(Ptr, Name);
}
LoadInst *BuilderGfxMem::LOAD(
Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
LoadInst* BuilderGfxMem::LOAD(
Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
@ -192,10 +193,10 @@ namespace SwrJit
return Builder::LOAD(Ptr, isVolatile, Name);
}
LoadInst *BuilderGfxMem::LOAD(Value * BasePtr,
const std::initializer_list<uint32_t> &offset,
const llvm::Twine & name,
Type * Ty,
LoadInst* BuilderGfxMem::LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& name,
Type* Ty,
JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
@ -207,7 +208,7 @@ namespace SwrJit
BasePtr = INT_TO_PTR(BasePtr, Ty, name);
bNeedTranslation = true;
}
std::vector<Value *> valIndices;
std::vector<Value*> valIndices;
for (auto i : offset)
{
valIndices.push_back(C(i));
@ -221,12 +222,13 @@ namespace SwrJit
return LOAD(BasePtr, name, Ty, usage);
}
CallInst *BuilderGfxMem::MASKED_LOAD(Value * Ptr,
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
Value * Mask,
Value * PassThru,
const Twine & Name,
Type * Ty,
Value* Mask,
Value* PassThru,
const Twine& Name,
Type* Ty,
JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
@ -235,21 +237,9 @@ namespace SwrJit
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
}
Value *BuilderGfxMem::TranslateGfxAddressForRead(Value * xpGfxAddress,
Type * PtrTy,
const Twine &Name,
JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
PtrTy = mInt8PtrTy;
}
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
}
Value *BuilderGfxMem::TranslateGfxAddressForWrite(Value * xpGfxAddress,
Type * PtrTy,
const Twine &Name,
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
@ -259,4 +249,16 @@ namespace SwrJit
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
}
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
PtrTy = mInt8PtrTy;
}
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
}
} // namespace SwrJit

View File

@ -38,53 +38,54 @@ namespace SwrJit
class BuilderGfxMem : public Builder
{
public:
BuilderGfxMem(JitManager *pJitMgr);
BuilderGfxMem(JitManager* pJitMgr);
virtual ~BuilderGfxMem() {}
virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
virtual Value *
GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty = nullptr);
virtual Value *
GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
virtual Value*
GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
virtual Value*
GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
virtual LoadInst *LOAD(Value * Ptr,
const char * Name,
Type * Ty = nullptr,
virtual LoadInst* LOAD(Value* Ptr,
const char* Name,
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst *LOAD(Value * Ptr,
const Twine & Name = "",
Type * Ty = nullptr,
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst *LOAD(Value * Ptr,
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine & Name = "",
Type * Ty = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual LoadInst *LOAD(Value * BasePtr,
const std::initializer_list<uint32_t> &offset,
const llvm::Twine & Name = "",
Type * Ty = nullptr,
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual CallInst *MASKED_LOAD(Value * Ptr,
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
Value * Mask,
Value * PassThru = nullptr,
const Twine & Name = "",
Type * Ty = nullptr,
Value* Mask,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value *GATHERPS(Value * src,
Value * pBase,
Value * indices,
Value * mask,
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
virtual Value *GATHERDD(Value * src,
Value * pBase,
Value * indices,
Value * mask,
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
@ -95,35 +96,35 @@ namespace SwrJit
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
Value *TranslateGfxAddressForRead(Value * xpGfxAddress,
Type * PtrTy = nullptr,
const Twine & Name = "",
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
Value *TranslateGfxAddressForWrite(Value * xpGfxAddress,
Type * PtrTy = nullptr,
const Twine & Name = "",
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
protected:
void AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage);
void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
virtual Value *OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset);
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
Value *TranslationHelper(Value *Ptr, Type *Ty);
Value* TranslationHelper(Value* Ptr, Type* Ty);
FunctionType *GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value * GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value * GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value * GetParamSimDC() { return mpParamSimDC; }
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value* GetParamSimDC() { return mpParamSimDC; }
private:
FunctionType *mpTranslationFuncTy;
Value * mpfnTranslateGfxAddressForRead;
Value * mpfnTranslateGfxAddressForWrite;
Value * mpParamSimDC;
FunctionType* mpTranslationFuncTy;
Value* mpfnTranslateGfxAddressForRead;
Value* mpfnTranslateGfxAddressForWrite;
Value* mpParamSimDC;
};
} // namespace SwrJit

View File

@ -37,7 +37,7 @@
namespace llvm
{
// foward declare the initializer
void initializeLowerX86Pass(PassRegistry &);
void initializeLowerX86Pass(PassRegistry&);
} // namespace llvm
namespace SwrJit
@ -60,7 +60,7 @@ namespace SwrJit
struct LowerX86;
typedef std::function<Instruction *(LowerX86 *, TargetArch, TargetWidth, CallInst *)> EmuFunc;
typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
struct X86Intrinsic
{
@ -83,22 +83,22 @@ namespace SwrJit
};
// Forward decls
Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
Instruction *
VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
Instruction *
VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
Instruction *
VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
Instruction *
VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
Instruction *
VCONVERT_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction*
VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction *DOUBLE_EMU(LowerX86 * pThis,
Instruction* DOUBLE_EMU(LowerX86* pThis,
TargetArch arch,
TargetWidth width,
CallInst * pCallInst,
CallInst* pCallInst,
Intrinsic::ID intrin);
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
@ -188,7 +188,7 @@ namespace SwrJit
struct LowerX86 : public FunctionPass
{
LowerX86(Builder *b = nullptr) : FunctionPass(ID), B(b)
LowerX86(Builder* b = nullptr) : FunctionPass(ID), B(b)
{
initializeLowerX86Pass(*PassRegistry::getPassRegistry());
@ -216,12 +216,12 @@ namespace SwrJit
// across all intrinsics, and will have to be rethought. Probably need something
// similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
// intrinsic.
void GetRequestedWidthAndType(CallInst * pCallInst,
void GetRequestedWidthAndType(CallInst* pCallInst,
const StringRef intrinName,
TargetWidth * pWidth,
Type ** pTy)
TargetWidth* pWidth,
Type** pTy)
{
Type *pVecTy = pCallInst->getType();
Type* pVecTy = pCallInst->getType();
// Check for intrinsic specific types
// VCVTPD2PS type comes from src, not dst
@ -232,7 +232,7 @@ namespace SwrJit
if (!pVecTy->isVectorTy())
{
for (auto &op : pCallInst->arg_operands())
for (auto& op : pCallInst->arg_operands())
{
if (op.get()->getType()->isVectorTy())
{
@ -260,7 +260,7 @@ namespace SwrJit
*pTy = pVecTy->getScalarType();
}
Value *GetZeroVec(TargetWidth width, Type *pTy)
Value* GetZeroVec(TargetWidth width, Type* pTy)
{
uint32_t numElem = 0;
switch (width)
@ -278,9 +278,9 @@ namespace SwrJit
return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
}
Value *GetMask(TargetWidth width)
Value* GetMask(TargetWidth width)
{
Value *mask;
Value* mask;
switch (width)
{
case W256:
@ -296,18 +296,18 @@ namespace SwrJit
}
// Convert <N x i1> mask to <N x i32> x86 mask
Value *VectorMask(Value *vi1Mask)
Value* VectorMask(Value* vi1Mask)
{
uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
}
Instruction *ProcessIntrinsicAdvanced(CallInst *pCallInst)
Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
{
Function * pFunc = pCallInst->getCalledFunction();
auto & intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
Function* pFunc = pCallInst->getCalledFunction();
auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
TargetWidth vecWidth;
Type * pElemTy;
Type* pElemTy;
GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
// Check if there is a native intrinsic for this instruction
@ -323,9 +323,9 @@ namespace SwrJit
}
else if (id != Intrinsic::not_intrinsic)
{
Function *pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
SmallVector<Value *, 8> args;
for (auto &arg : pCallInst->arg_operands())
Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
SmallVector<Value*, 8> args;
for (auto& arg : pCallInst->arg_operands())
{
args.push_back(arg.get());
}
@ -361,9 +361,9 @@ namespace SwrJit
return nullptr;
}
Instruction *ProcessIntrinsic(CallInst *pCallInst)
Instruction* ProcessIntrinsic(CallInst* pCallInst)
{
Function *pFunc = pCallInst->getCalledFunction();
Function* pFunc = pCallInst->getCalledFunction();
// Forward to the advanced support if found
if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
@ -376,11 +376,11 @@ namespace SwrJit
pFunc->getName());
Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
Function * pX86IntrinFunc =
Function* pX86IntrinFunc =
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
SmallVector<Value *, 8> args;
for (auto &arg : pCallInst->arg_operands())
SmallVector<Value*, 8> args;
for (auto& arg : pCallInst->arg_operands())
{
args.push_back(arg.get());
}
@ -390,23 +390,23 @@ namespace SwrJit
//////////////////////////////////////////////////////////////////////////
/// @brief LLVM funtion pass run method.
/// @param f- The function we're working on with this pass.
virtual bool runOnFunction(Function &F)
virtual bool runOnFunction(Function& F)
{
std::vector<Instruction *> toRemove;
std::vector<Instruction*> toRemove;
for (auto &BB : F.getBasicBlockList())
for (auto& BB : F.getBasicBlockList())
{
for (auto &I : BB.getInstList())
for (auto& I : BB.getInstList())
{
if (CallInst *pCallInst = dyn_cast<CallInst>(&I))
if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
{
Function *pFunc = pCallInst->getCalledFunction();
Function* pFunc = pCallInst->getCalledFunction();
if (pFunc)
{
if (pFunc->getName().startswith("meta.intrinsic"))
{
B->IRB()->SetInsertPoint(&I);
Instruction *pReplace = ProcessIntrinsic(pCallInst);
Instruction* pReplace = ProcessIntrinsic(pCallInst);
SWR_ASSERT(pReplace);
toRemove.push_back(pCallInst);
pCallInst->replaceAllUsesWith(pReplace);
@ -416,7 +416,7 @@ namespace SwrJit
}
}
for (auto *pInst : toRemove)
for (auto* pInst : toRemove)
{
pInst->eraseFromParent();
}
@ -426,11 +426,11 @@ namespace SwrJit
return true;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
virtual void getAnalysisUsage(AnalysisUsage& AU) const {}
JitManager *JM() { return B->JM(); }
JitManager* JM() { return B->JM(); }
Builder *B;
Builder* B;
TargetArch mTarget;
@ -439,24 +439,24 @@ namespace SwrJit
char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
FunctionPass *createLowerX86Pass(Builder *b) { return new LowerX86(b); }
FunctionPass* createLowerX86Pass(Builder* b) { return new LowerX86(b); }
Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
return nullptr;
}
Instruction *VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
// Only need vperm emulation for AVX
SWR_ASSERT(arch == AVX);
Builder *B = pThis->B;
Builder* B = pThis->B;
auto v32A = pCallInst->getArgOperand(0);
auto vi32Index = pCallInst->getArgOperand(1);
Value *v32Result;
Value* v32Result;
if (isa<Constant>(vi32Index))
{
// Can use llvm shuffle vector directly with constant shuffle indices
@ -475,10 +475,10 @@ namespace SwrJit
return cast<Instruction>(v32Result);
}
Instruction *
VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
Instruction*
VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
Builder *B = pThis->B;
Builder* B = pThis->B;
auto vSrc = pCallInst->getArgOperand(0);
auto pBase = pCallInst->getArgOperand(1);
auto vi32Indices = pCallInst->getArgOperand(2);
@ -489,7 +489,7 @@ namespace SwrJit
uint32_t numElem = vSrc->getType()->getVectorNumElements();
auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
auto srcTy = vSrc->getType()->getVectorElementType();
Value * v32Gather;
Value* v32Gather;
if (arch == AVX)
{
// Full emulation for AVX
@ -518,7 +518,7 @@ namespace SwrJit
}
else if (arch == AVX2 || (arch == AVX512 && width == W256))
{
Function *pX86IntrinFunc;
Function* pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
@ -555,14 +555,14 @@ namespace SwrJit
VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
v64Mask = B->BITCAST(v64Mask, vSrc->getType());
Value *src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
Value *src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
Value *indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
Value *indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
Value *mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
Value *mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
src0 = B->BITCAST(
src0,
@ -570,7 +570,7 @@ namespace SwrJit
mask0 = B->BITCAST(
mask0,
VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
Value *gather0 =
Value* gather0 =
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
src1 = B->BITCAST(
src1,
@ -578,7 +578,7 @@ namespace SwrJit
mask1 = B->BITCAST(
mask1,
VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
Value *gather1 =
Value* gather1 =
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
@ -589,18 +589,18 @@ namespace SwrJit
// Double pump 8-wide for 32bit elements
auto v32Mask = pThis->VectorMask(vi1Mask);
v32Mask = B->BITCAST(v32Mask, vSrc->getType());
Value *src0 = B->EXTRACT_16(vSrc, 0);
Value *src1 = B->EXTRACT_16(vSrc, 1);
Value* src0 = B->EXTRACT_16(vSrc, 0);
Value* src1 = B->EXTRACT_16(vSrc, 1);
Value *indices0 = B->EXTRACT_16(vi32Indices, 0);
Value *indices1 = B->EXTRACT_16(vi32Indices, 1);
Value* indices0 = B->EXTRACT_16(vi32Indices, 0);
Value* indices1 = B->EXTRACT_16(vi32Indices, 1);
Value *mask0 = B->EXTRACT_16(v32Mask, 0);
Value *mask1 = B->EXTRACT_16(v32Mask, 1);
Value* mask0 = B->EXTRACT_16(v32Mask, 0);
Value* mask1 = B->EXTRACT_16(v32Mask, 1);
Value *gather0 =
Value* gather0 =
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
Value *gather1 =
Value* gather1 =
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
v32Gather = B->JOIN_16(gather0, gather1);
@ -609,8 +609,8 @@ namespace SwrJit
}
else if (arch == AVX512)
{
Value * iMask;
Function *pX86IntrinFunc;
Value* iMask;
Function* pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
@ -643,8 +643,8 @@ namespace SwrJit
// No support for vroundps in avx512 (it is available in kncni), so emulate with avx
// instructions
Instruction *
VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
Instruction*
VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(arch == AVX512);
@ -676,22 +676,25 @@ namespace SwrJit
return nullptr;
}
Instruction *VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
Instruction*
VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(arch == AVX512);
auto B = pThis->B;
auto B = pThis->B;
auto vf32Src = pCallInst->getOperand(0);
if (width == W256)
{
auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
Intrinsic::x86_avx_round_ps_256);
return cast<Instruction>(B->FP_TRUNC(vf32SrcRound, B->mFP32Ty));
}
else if (width == W512)
{
// 512 can use intrinsic
auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_mask_cvtpd2ps_512);
auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
Intrinsic::x86_avx512_mask_cvtpd2ps_512);
return cast<Instruction>(B->CALL(pfnFunc, vf32Src));
}
else
@ -703,7 +706,7 @@ namespace SwrJit
}
// No support for hsub in AVX512
Instruction *VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
{
SWR_ASSERT(arch == AVX512);
@ -734,27 +737,27 @@ namespace SwrJit
// Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from
// each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide
Instruction *DOUBLE_EMU(LowerX86 * pThis,
Instruction* DOUBLE_EMU(LowerX86* pThis,
TargetArch arch,
TargetWidth width,
CallInst * pCallInst,
CallInst* pCallInst,
Intrinsic::ID intrin)
{
auto B = pThis->B;
SWR_ASSERT(width == W512);
Value * result[2];
Function *pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
Value* result[2];
Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
for (uint32_t i = 0; i < 2; ++i)
{
SmallVector<Value *, 8> args;
for (auto &arg : pCallInst->arg_operands())
SmallVector<Value*, 8> args;
for (auto& arg : pCallInst->arg_operands())
{
auto argType = arg.get()->getType();
if (argType->isVectorTy())
{
uint32_t vecWidth = argType->getVectorNumElements();
Value * lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
Value * argToPush = B->VSHUFFLE(
Value* lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
Value* argToPush = B->VSHUFFLE(
arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
args.push_back(argToPush);
}
@ -776,7 +779,7 @@ namespace SwrJit
{
vecWidth = 2;
}
Value *lanes = B->CInc<int>(0, vecWidth);
Value* lanes = B->CInc<int>(0, vecWidth);
return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes));
}