swr/rast: Updating code style based on current clang-format rules
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
f90a60fe79
commit
5b7d4f9428
|
@ -59,7 +59,7 @@ using namespace SwrJit;
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Contructor for JitManager.
|
||||
/// @param simdWidth - SIMD width to be used in generated program.
|
||||
JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
|
||||
JitManager::JitManager(uint32_t simdWidth, const char* arch, const char* core) :
|
||||
mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
|
||||
mArch(arch)
|
||||
{
|
||||
|
@ -153,7 +153,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
|
|||
}
|
||||
|
||||
#if LLVM_USE_INTEL_JITEVENTS
|
||||
JITEventListener *vTune = JITEventListener::createIntelJITEventListener();
|
||||
JITEventListener* vTune = JITEventListener::createIntelJITEventListener();
|
||||
mpExec->RegisterJITEventListener(vTune);
|
||||
#endif
|
||||
|
||||
|
@ -163,7 +163,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
|
|||
#else
|
||||
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
||||
#endif
|
||||
std::vector<Type *> fsArgs;
|
||||
std::vector<Type*> fsArgs;
|
||||
|
||||
// llvm5 is picky and does not take a void * type
|
||||
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
|
||||
|
@ -212,21 +212,21 @@ void JitManager::SetupNewModule()
|
|||
}
|
||||
|
||||
|
||||
DIType *
|
||||
JitManager::CreateDebugStructType(StructType * pType,
|
||||
const std::string & name,
|
||||
DIFile * pFile,
|
||||
DIType*
|
||||
JitManager::CreateDebugStructType(StructType* pType,
|
||||
const std::string& name,
|
||||
DIFile* pFile,
|
||||
uint32_t lineNum,
|
||||
const std::vector<std::pair<std::string, uint32_t>> &members)
|
||||
const std::vector<std::pair<std::string, uint32_t>>& members)
|
||||
{
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
SmallVector<Metadata *, 8> ElemTypes;
|
||||
DataLayout DL = DataLayout(mpCurrentModule);
|
||||
uint32_t size = DL.getTypeAllocSizeInBits(pType);
|
||||
uint32_t alignment = DL.getABITypeAlignment(pType);
|
||||
DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
SmallVector<Metadata*, 8> ElemTypes;
|
||||
DataLayout DL = DataLayout(mpCurrentModule);
|
||||
uint32_t size = DL.getTypeAllocSizeInBits(pType);
|
||||
uint32_t alignment = DL.getABITypeAlignment(pType);
|
||||
DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
|
||||
|
||||
DICompositeType *pDIStructTy = builder.createStructType(pFile,
|
||||
DICompositeType* pDIStructTy = builder.createStructType(pFile,
|
||||
name,
|
||||
pFile,
|
||||
lineNum,
|
||||
|
@ -240,14 +240,14 @@ JitManager::CreateDebugStructType(StructType *
|
|||
mDebugStructMap[pType] = pDIStructTy;
|
||||
|
||||
uint32_t idx = 0;
|
||||
for (auto &elem : pType->elements())
|
||||
for (auto& elem : pType->elements())
|
||||
{
|
||||
std::string name = members[idx].first;
|
||||
uint32_t lineNum = members[idx].second;
|
||||
size = DL.getTypeAllocSizeInBits(elem);
|
||||
alignment = DL.getABITypeAlignment(elem);
|
||||
uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
|
||||
llvm::DIType *pDebugTy = GetDebugType(elem);
|
||||
llvm::DIType* pDebugTy = GetDebugType(elem);
|
||||
ElemTypes.push_back(builder.createMemberType(
|
||||
pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
|
||||
|
||||
|
@ -258,22 +258,22 @@ JitManager::CreateDebugStructType(StructType *
|
|||
return pDIStructTy;
|
||||
}
|
||||
|
||||
DIType *JitManager::GetDebugArrayType(Type *pTy)
|
||||
DIType* JitManager::GetDebugArrayType(Type* pTy)
|
||||
{
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
DataLayout DL = DataLayout(mpCurrentModule);
|
||||
ArrayType *pArrayTy = cast<ArrayType>(pTy);
|
||||
ArrayType* pArrayTy = cast<ArrayType>(pTy);
|
||||
uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
|
||||
uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
|
||||
|
||||
SmallVector<Metadata *, 8> Elems;
|
||||
SmallVector<Metadata*, 8> Elems;
|
||||
Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements()));
|
||||
return builder.createArrayType(
|
||||
size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
|
||||
}
|
||||
|
||||
// Create a DIType from llvm Type
|
||||
DIType *JitManager::GetDebugType(Type *pTy)
|
||||
DIType* JitManager::GetDebugType(Type* pTy)
|
||||
{
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
Type::TypeID id = pTy->getTypeID();
|
||||
|
@ -317,17 +317,17 @@ DIType *JitManager::GetDebugType(Type *pTy)
|
|||
}
|
||||
|
||||
// Create a DISubroutineType from an llvm FunctionType
|
||||
DIType *JitManager::GetDebugFunctionType(Type *pTy)
|
||||
DIType* JitManager::GetDebugFunctionType(Type* pTy)
|
||||
{
|
||||
SmallVector<Metadata *, 8> ElemTypes;
|
||||
FunctionType * pFuncTy = cast<FunctionType>(pTy);
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
SmallVector<Metadata*, 8> ElemTypes;
|
||||
FunctionType* pFuncTy = cast<FunctionType>(pTy);
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
|
||||
// Add result type
|
||||
ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType()));
|
||||
|
||||
// Add arguments
|
||||
for (auto ¶m : pFuncTy->params())
|
||||
for (auto& param : pFuncTy->params())
|
||||
{
|
||||
ElemTypes.push_back(GetDebugType(param));
|
||||
}
|
||||
|
@ -335,10 +335,10 @@ DIType *JitManager::GetDebugFunctionType(Type *pTy)
|
|||
return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes));
|
||||
}
|
||||
|
||||
DIType *JitManager::GetDebugIntegerType(Type *pTy)
|
||||
DIType* JitManager::GetDebugIntegerType(Type* pTy)
|
||||
{
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
IntegerType *pIntTy = cast<IntegerType>(pTy);
|
||||
IntegerType* pIntTy = cast<IntegerType>(pTy);
|
||||
switch (pIntTy->getBitWidth())
|
||||
{
|
||||
case 1:
|
||||
|
@ -365,14 +365,14 @@ DIType *JitManager::GetDebugIntegerType(Type *pTy)
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
DIType *JitManager::GetDebugVectorType(Type *pTy)
|
||||
DIType* JitManager::GetDebugVectorType(Type* pTy)
|
||||
{
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
VectorType * pVecTy = cast<VectorType>(pTy);
|
||||
DataLayout DL = DataLayout(mpCurrentModule);
|
||||
uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
|
||||
uint32_t alignment = DL.getABITypeAlignment(pVecTy);
|
||||
SmallVector<Metadata *, 1> Elems;
|
||||
DIBuilder builder(*mpCurrentModule);
|
||||
VectorType* pVecTy = cast<VectorType>(pTy);
|
||||
DataLayout DL = DataLayout(mpCurrentModule);
|
||||
uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
|
||||
uint32_t alignment = DL.getABITypeAlignment(pVecTy);
|
||||
SmallVector<Metadata*, 1> Elems;
|
||||
Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements()));
|
||||
|
||||
return builder.createVectorType(size,
|
||||
|
@ -385,7 +385,7 @@ DIType *JitManager::GetDebugVectorType(Type *pTy)
|
|||
/// @brief Dump function x86 assembly to file.
|
||||
/// @note This should only be called after the module has been jitted to x86 and the
|
||||
/// module will not be further accessed.
|
||||
void JitManager::DumpAsm(Function *pFunction, const char *fileName)
|
||||
void JitManager::DumpAsm(Function* pFunction, const char* fileName)
|
||||
{
|
||||
if (KNOB_DUMP_SHADER_IR)
|
||||
{
|
||||
|
@ -393,15 +393,15 @@ void JitManager::DumpAsm(Function *pFunction, const char *fileName)
|
|||
DWORD pid = GetCurrentProcessId();
|
||||
char procname[MAX_PATH];
|
||||
GetModuleFileNameA(NULL, procname, MAX_PATH);
|
||||
const char * pBaseName = strrchr(procname, '\\');
|
||||
const char* pBaseName = strrchr(procname, '\\');
|
||||
std::stringstream outDir;
|
||||
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
|
||||
CreateDirectoryPath(outDir.str().c_str());
|
||||
#endif
|
||||
|
||||
std::error_code EC;
|
||||
Module * pModule = pFunction->getParent();
|
||||
const char * funcName = pFunction->getName().data();
|
||||
Module* pModule = pFunction->getParent();
|
||||
const char* funcName = pFunction->getName().data();
|
||||
char fName[256];
|
||||
#if defined(_WIN32)
|
||||
sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
|
||||
|
@ -411,11 +411,12 @@ void JitManager::DumpAsm(Function *pFunction, const char *fileName)
|
|||
|
||||
raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
|
||||
|
||||
legacy::PassManager *pMPasses = new legacy::PassManager();
|
||||
auto * pTarget = mpExec->getTargetMachine();
|
||||
legacy::PassManager* pMPasses = new legacy::PassManager();
|
||||
auto* pTarget = mpExec->getTargetMachine();
|
||||
pTarget->Options.MCOptions.AsmVerbose = true;
|
||||
#if LLVM_VERSION_MAJOR >= 7
|
||||
pTarget->addPassesToEmitFile(*pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
|
||||
pTarget->addPassesToEmitFile(
|
||||
*pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
|
||||
#else
|
||||
pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
|
||||
#endif
|
||||
|
@ -431,7 +432,7 @@ std::string JitManager::GetOutputDir()
|
|||
DWORD pid = GetCurrentProcessId();
|
||||
char procname[MAX_PATH];
|
||||
GetModuleFileNameA(NULL, procname, MAX_PATH);
|
||||
const char * pBaseName = strrchr(procname, '\\');
|
||||
const char* pBaseName = strrchr(procname, '\\');
|
||||
std::stringstream outDir;
|
||||
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid;
|
||||
CreateDirectoryPath(outDir.str().c_str());
|
||||
|
@ -442,14 +443,14 @@ std::string JitManager::GetOutputDir()
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Dump function to file.
|
||||
void JitManager::DumpToFile(Module *M, const char *fileName)
|
||||
void JitManager::DumpToFile(Module* M, const char* fileName)
|
||||
{
|
||||
if (KNOB_DUMP_SHADER_IR)
|
||||
{
|
||||
std::string outDir = GetOutputDir();
|
||||
|
||||
std::error_code EC;
|
||||
const char * funcName = M->getName().data();
|
||||
const char* funcName = M->getName().data();
|
||||
char fName[256];
|
||||
#if defined(_WIN32)
|
||||
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
|
||||
|
@ -464,14 +465,14 @@ void JitManager::DumpToFile(Module *M, const char *fileName)
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Dump function to file.
|
||||
void JitManager::DumpToFile(Function *f, const char *fileName)
|
||||
void JitManager::DumpToFile(Function* f, const char* fileName)
|
||||
{
|
||||
if (KNOB_DUMP_SHADER_IR)
|
||||
{
|
||||
std::string outDir = GetOutputDir();
|
||||
|
||||
std::error_code EC;
|
||||
const char * funcName = f->getName().data();
|
||||
const char* funcName = f->getName().data();
|
||||
char fName[256];
|
||||
#if defined(_WIN32)
|
||||
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
|
||||
|
@ -489,7 +490,7 @@ void JitManager::DumpToFile(Function *f, const char *fileName)
|
|||
fd.flush();
|
||||
|
||||
raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
|
||||
WriteGraph(fd_cfg, (const Function *)f);
|
||||
WriteGraph(fd_cfg, (const Function*)f);
|
||||
|
||||
fd_cfg.flush();
|
||||
}
|
||||
|
@ -501,7 +502,7 @@ bool g_DllActive = true;
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Create JIT context.
|
||||
/// @param simdWidth - SIMD width to be used in generated program.
|
||||
HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char *arch, const char *core)
|
||||
HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core)
|
||||
{
|
||||
return new JitManager(targetSimdWidth, arch, core);
|
||||
}
|
||||
|
@ -512,7 +513,7 @@ void JITCALL JitDestroyContext(HANDLE hJitContext)
|
|||
{
|
||||
if (g_DllActive)
|
||||
{
|
||||
delete reinterpret_cast<JitManager *>(hJitContext);
|
||||
delete reinterpret_cast<JitManager*>(hJitContext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -528,8 +529,8 @@ struct JitCacheFileHeader
|
|||
{
|
||||
void Init(uint32_t llCRC,
|
||||
uint32_t objCRC,
|
||||
const std::string &moduleID,
|
||||
const std::string &cpu,
|
||||
const std::string& moduleID,
|
||||
const std::string& cpu,
|
||||
uint32_t optLevel,
|
||||
uint64_t objSize)
|
||||
{
|
||||
|
@ -545,7 +546,7 @@ struct JitCacheFileHeader
|
|||
|
||||
|
||||
bool
|
||||
IsValid(uint32_t llCRC, const std::string &moduleID, const std::string &cpu, uint32_t optLevel)
|
||||
IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel)
|
||||
{
|
||||
if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) ||
|
||||
(m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel))
|
||||
|
@ -576,7 +577,7 @@ private:
|
|||
static const size_t JC_STR_MAX_LEN = 32;
|
||||
static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
|
||||
(LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
|
||||
((sizeof(void *) > sizeof(uint32_t)) ? 1 : 0);
|
||||
((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
|
||||
|
||||
uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
|
||||
uint64_t m_objSize = 0;
|
||||
|
@ -588,7 +589,7 @@ private:
|
|||
char m_Cpu[JC_STR_MAX_LEN] = {};
|
||||
};
|
||||
|
||||
static inline uint32_t ComputeModuleCRC(const llvm::Module *M)
|
||||
static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
|
||||
{
|
||||
std::string bitcodeBuffer;
|
||||
raw_string_ostream bitcodeStream(bitcodeBuffer);
|
||||
|
@ -611,7 +612,7 @@ JitCache::JitCache()
|
|||
#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
|
||||
if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0)
|
||||
{
|
||||
char *homedir;
|
||||
char* homedir;
|
||||
if (!(homedir = getenv("HOME")))
|
||||
{
|
||||
homedir = getpwuid(getuid())->pw_dir;
|
||||
|
@ -626,15 +627,15 @@ JitCache::JitCache()
|
|||
}
|
||||
}
|
||||
|
||||
int ExecUnhookedProcess(const std::string &CmdLine, std::string *pStdOut, std::string *pStdErr)
|
||||
int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
|
||||
{
|
||||
return ExecCmd(CmdLine, "", pStdOut, pStdErr);
|
||||
}
|
||||
|
||||
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
|
||||
void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj)
|
||||
void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
|
||||
{
|
||||
const std::string &moduleID = M->getModuleIdentifier();
|
||||
const std::string& moduleID = M->getModuleIdentifier();
|
||||
if (!moduleID.length())
|
||||
{
|
||||
return;
|
||||
|
@ -671,7 +672,7 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
|
|||
|
||||
header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize());
|
||||
|
||||
fileObj.write((const char *)&header, sizeof(header));
|
||||
fileObj.write((const char*)&header, sizeof(header));
|
||||
fileObj.flush();
|
||||
}
|
||||
}
|
||||
|
@ -679,9 +680,9 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
|
|||
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
|
||||
/// object which corresponds with Module M, or 0 if an object is not
|
||||
/// available.
|
||||
std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
|
||||
std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
|
||||
{
|
||||
const std::string &moduleID = M->getModuleIdentifier();
|
||||
const std::string& moduleID = M->getModuleIdentifier();
|
||||
mCurrentModuleCRC = ComputeModuleCRC(M);
|
||||
|
||||
if (!moduleID.length())
|
||||
|
@ -700,8 +701,8 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
|
|||
llvm::SmallString<MAX_PATH> objFilePath = filePath;
|
||||
objFilePath += JIT_OBJ_EXT;
|
||||
|
||||
FILE *fpObjIn = nullptr;
|
||||
FILE *fpIn = fopen(filePath.c_str(), "rb");
|
||||
FILE* fpObjIn = nullptr;
|
||||
FILE* fpIn = fopen(filePath.c_str(), "rb");
|
||||
if (!fpIn)
|
||||
{
|
||||
return nullptr;
|
||||
|
@ -732,7 +733,7 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
|
|||
#else
|
||||
pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
|
||||
#endif
|
||||
if (!fread(const_cast<char *>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
|
||||
if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
|
||||
{
|
||||
pBuf = nullptr;
|
||||
break;
|
||||
|
|
|
@ -37,12 +37,12 @@ namespace SwrJit
|
|||
{
|
||||
using namespace llvm;
|
||||
|
||||
BuilderGfxMem::BuilderGfxMem(JitManager *pJitMgr) : Builder(pJitMgr)
|
||||
BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) : Builder(pJitMgr)
|
||||
{
|
||||
mpTranslationFuncTy = nullptr;
|
||||
mpfnTranslateGfxAddressForRead = nullptr;
|
||||
mpTranslationFuncTy = nullptr;
|
||||
mpfnTranslateGfxAddressForRead = nullptr;
|
||||
mpfnTranslateGfxAddressForWrite = nullptr;
|
||||
mpParamSimDC = nullptr;
|
||||
mpParamSimDC = nullptr;
|
||||
|
||||
}
|
||||
|
||||
|
@ -50,7 +50,7 @@ namespace SwrJit
|
|||
{
|
||||
}
|
||||
|
||||
void BuilderGfxMem::AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage)
|
||||
void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
|
||||
{
|
||||
SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
|
||||
"Internal memory should not be gfxptr_t.");
|
||||
|
@ -65,10 +65,10 @@ namespace SwrJit
|
|||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *BuilderGfxMem::GATHERPS(Value * vSrc,
|
||||
Value * pBase,
|
||||
Value * vIndices,
|
||||
Value * vMask,
|
||||
Value* BuilderGfxMem::GATHERPS(Value* vSrc,
|
||||
Value* pBase,
|
||||
Value* vIndices,
|
||||
Value* vMask,
|
||||
uint8_t scale,
|
||||
JIT_MEM_CLIENT usage)
|
||||
{
|
||||
|
@ -78,7 +78,7 @@ namespace SwrJit
|
|||
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
|
||||
}
|
||||
|
||||
Value *vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
|
||||
Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
|
||||
return vGather;
|
||||
}
|
||||
|
||||
|
@ -90,10 +90,10 @@ namespace SwrJit
|
|||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *BuilderGfxMem::GATHERDD(Value * vSrc,
|
||||
Value * pBase,
|
||||
Value * vIndices,
|
||||
Value * vMask,
|
||||
Value* BuilderGfxMem::GATHERDD(Value* vSrc,
|
||||
Value* pBase,
|
||||
Value* vIndices,
|
||||
Value* vMask,
|
||||
uint8_t scale,
|
||||
JIT_MEM_CLIENT usage)
|
||||
{
|
||||
|
@ -104,7 +104,7 @@ namespace SwrJit
|
|||
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
|
||||
}
|
||||
|
||||
Value *vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
|
||||
Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
|
||||
return vGather;
|
||||
}
|
||||
|
||||
|
@ -122,37 +122,37 @@ namespace SwrJit
|
|||
}
|
||||
|
||||
|
||||
Value *BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset)
|
||||
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
|
||||
{
|
||||
return ADD(base, offset);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
|
||||
Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ptr, Idx, nullptr, Name);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
|
||||
Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ty, Ptr, Idx, Name);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty)
|
||||
Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ptr, indexList);
|
||||
}
|
||||
|
||||
Value *
|
||||
BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
|
||||
Value*
|
||||
BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
|
||||
{
|
||||
Ptr = TranslationHelper(Ptr, Ty);
|
||||
return Builder::GEP(Ptr, indexList);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
|
||||
Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
|
||||
{
|
||||
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
|
||||
"Access of GFX pointers must have non-null type specified.");
|
||||
|
@ -167,7 +167,7 @@ namespace SwrJit
|
|||
return Ptr;
|
||||
}
|
||||
|
||||
LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
|
@ -175,7 +175,7 @@ namespace SwrJit
|
|||
return Builder::LOAD(Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
|
@ -183,8 +183,9 @@ namespace SwrJit
|
|||
return Builder::LOAD(Ptr, Name);
|
||||
}
|
||||
|
||||
LoadInst *BuilderGfxMem::LOAD(
|
||||
Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
|
||||
|
||||
LoadInst* BuilderGfxMem::LOAD(
|
||||
Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
||||
|
@ -192,10 +193,10 @@ namespace SwrJit
|
|||
return Builder::LOAD(Ptr, isVolatile, Name);
|
||||
}
|
||||
|
||||
LoadInst *BuilderGfxMem::LOAD(Value * BasePtr,
|
||||
const std::initializer_list<uint32_t> &offset,
|
||||
const llvm::Twine & name,
|
||||
Type * Ty,
|
||||
LoadInst* BuilderGfxMem::LOAD(Value* BasePtr,
|
||||
const std::initializer_list<uint32_t>& offset,
|
||||
const llvm::Twine& name,
|
||||
Type* Ty,
|
||||
JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(BasePtr, usage);
|
||||
|
@ -207,7 +208,7 @@ namespace SwrJit
|
|||
BasePtr = INT_TO_PTR(BasePtr, Ty, name);
|
||||
bNeedTranslation = true;
|
||||
}
|
||||
std::vector<Value *> valIndices;
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : offset)
|
||||
{
|
||||
valIndices.push_back(C(i));
|
||||
|
@ -221,12 +222,13 @@ namespace SwrJit
|
|||
return LOAD(BasePtr, name, Ty, usage);
|
||||
}
|
||||
|
||||
CallInst *BuilderGfxMem::MASKED_LOAD(Value * Ptr,
|
||||
|
||||
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
|
||||
unsigned Align,
|
||||
Value * Mask,
|
||||
Value * PassThru,
|
||||
const Twine & Name,
|
||||
Type * Ty,
|
||||
Value* Mask,
|
||||
Value* PassThru,
|
||||
const Twine& Name,
|
||||
Type* Ty,
|
||||
JIT_MEM_CLIENT usage)
|
||||
{
|
||||
AssertGFXMemoryParams(Ptr, usage);
|
||||
|
@ -235,21 +237,9 @@ namespace SwrJit
|
|||
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::TranslateGfxAddressForRead(Value * xpGfxAddress,
|
||||
Type * PtrTy,
|
||||
const Twine &Name,
|
||||
JIT_MEM_CLIENT /* usage */)
|
||||
{
|
||||
if (PtrTy == nullptr)
|
||||
{
|
||||
PtrTy = mInt8PtrTy;
|
||||
}
|
||||
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
|
||||
}
|
||||
|
||||
Value *BuilderGfxMem::TranslateGfxAddressForWrite(Value * xpGfxAddress,
|
||||
Type * PtrTy,
|
||||
const Twine &Name,
|
||||
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
|
||||
Type* PtrTy,
|
||||
const Twine& Name,
|
||||
JIT_MEM_CLIENT /* usage */)
|
||||
{
|
||||
if (PtrTy == nullptr)
|
||||
|
@ -259,4 +249,16 @@ namespace SwrJit
|
|||
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
|
||||
}
|
||||
|
||||
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
|
||||
Type* PtrTy,
|
||||
const Twine& Name,
|
||||
JIT_MEM_CLIENT /* usage */)
|
||||
{
|
||||
if (PtrTy == nullptr)
|
||||
{
|
||||
PtrTy = mInt8PtrTy;
|
||||
}
|
||||
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
|
||||
}
|
||||
|
||||
} // namespace SwrJit
|
||||
|
|
|
@ -38,53 +38,54 @@ namespace SwrJit
|
|||
class BuilderGfxMem : public Builder
|
||||
{
|
||||
public:
|
||||
BuilderGfxMem(JitManager *pJitMgr);
|
||||
BuilderGfxMem(JitManager* pJitMgr);
|
||||
virtual ~BuilderGfxMem() {}
|
||||
|
||||
virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
|
||||
virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
|
||||
virtual Value *
|
||||
GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty = nullptr);
|
||||
virtual Value *
|
||||
GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
|
||||
virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
|
||||
virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
|
||||
virtual Value*
|
||||
GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
|
||||
virtual Value*
|
||||
GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
|
||||
|
||||
virtual LoadInst *LOAD(Value * Ptr,
|
||||
const char * Name,
|
||||
Type * Ty = nullptr,
|
||||
virtual LoadInst* LOAD(Value* Ptr,
|
||||
const char* Name,
|
||||
Type* Ty = nullptr,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst *LOAD(Value * Ptr,
|
||||
const Twine & Name = "",
|
||||
Type * Ty = nullptr,
|
||||
virtual LoadInst* LOAD(Value* Ptr,
|
||||
const Twine& Name = "",
|
||||
Type* Ty = nullptr,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst *LOAD(Value * Ptr,
|
||||
virtual LoadInst* LOAD(Value* Ptr,
|
||||
bool isVolatile,
|
||||
const Twine & Name = "",
|
||||
Type * Ty = nullptr,
|
||||
const Twine& Name = "",
|
||||
Type* Ty = nullptr,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual LoadInst *LOAD(Value * BasePtr,
|
||||
const std::initializer_list<uint32_t> &offset,
|
||||
const llvm::Twine & Name = "",
|
||||
Type * Ty = nullptr,
|
||||
virtual LoadInst* LOAD(Value* BasePtr,
|
||||
const std::initializer_list<uint32_t>& offset,
|
||||
const llvm::Twine& Name = "",
|
||||
Type* Ty = nullptr,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
virtual CallInst *MASKED_LOAD(Value * Ptr,
|
||||
|
||||
virtual CallInst* MASKED_LOAD(Value* Ptr,
|
||||
unsigned Align,
|
||||
Value * Mask,
|
||||
Value * PassThru = nullptr,
|
||||
const Twine & Name = "",
|
||||
Type * Ty = nullptr,
|
||||
Value* Mask,
|
||||
Value* PassThru = nullptr,
|
||||
const Twine& Name = "",
|
||||
Type* Ty = nullptr,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
virtual Value *GATHERPS(Value * src,
|
||||
Value * pBase,
|
||||
Value * indices,
|
||||
Value * mask,
|
||||
virtual Value* GATHERPS(Value* src,
|
||||
Value* pBase,
|
||||
Value* indices,
|
||||
Value* mask,
|
||||
uint8_t scale = 1,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
virtual Value *GATHERDD(Value * src,
|
||||
Value * pBase,
|
||||
Value * indices,
|
||||
Value * mask,
|
||||
virtual Value* GATHERDD(Value* src,
|
||||
Value* pBase,
|
||||
Value* indices,
|
||||
Value* mask,
|
||||
uint8_t scale = 1,
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
|
@ -95,35 +96,35 @@ namespace SwrJit
|
|||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
|
||||
Value *TranslateGfxAddressForRead(Value * xpGfxAddress,
|
||||
Type * PtrTy = nullptr,
|
||||
const Twine & Name = "",
|
||||
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
|
||||
Type* PtrTy = nullptr,
|
||||
const Twine& Name = "",
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
Value *TranslateGfxAddressForWrite(Value * xpGfxAddress,
|
||||
Type * PtrTy = nullptr,
|
||||
const Twine & Name = "",
|
||||
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
|
||||
Type* PtrTy = nullptr,
|
||||
const Twine& Name = "",
|
||||
JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
|
||||
|
||||
|
||||
protected:
|
||||
void AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage);
|
||||
void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
|
||||
|
||||
virtual void NotifyPrivateContextSet();
|
||||
|
||||
virtual Value *OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset);
|
||||
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
|
||||
|
||||
Value *TranslationHelper(Value *Ptr, Type *Ty);
|
||||
Value* TranslationHelper(Value* Ptr, Type* Ty);
|
||||
|
||||
FunctionType *GetTranslationFunctionType() { return mpTranslationFuncTy; }
|
||||
Value * GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
|
||||
Value * GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
|
||||
Value * GetParamSimDC() { return mpParamSimDC; }
|
||||
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
|
||||
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
|
||||
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
|
||||
Value* GetParamSimDC() { return mpParamSimDC; }
|
||||
|
||||
|
||||
private:
|
||||
FunctionType *mpTranslationFuncTy;
|
||||
Value * mpfnTranslateGfxAddressForRead;
|
||||
Value * mpfnTranslateGfxAddressForWrite;
|
||||
Value * mpParamSimDC;
|
||||
FunctionType* mpTranslationFuncTy;
|
||||
Value* mpfnTranslateGfxAddressForRead;
|
||||
Value* mpfnTranslateGfxAddressForWrite;
|
||||
Value* mpParamSimDC;
|
||||
};
|
||||
} // namespace SwrJit
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
namespace llvm
|
||||
{
|
||||
// foward declare the initializer
|
||||
void initializeLowerX86Pass(PassRegistry &);
|
||||
void initializeLowerX86Pass(PassRegistry&);
|
||||
} // namespace llvm
|
||||
|
||||
namespace SwrJit
|
||||
|
@ -60,7 +60,7 @@ namespace SwrJit
|
|||
|
||||
struct LowerX86;
|
||||
|
||||
typedef std::function<Instruction *(LowerX86 *, TargetArch, TargetWidth, CallInst *)> EmuFunc;
|
||||
typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
|
||||
|
||||
struct X86Intrinsic
|
||||
{
|
||||
|
@ -83,22 +83,22 @@ namespace SwrJit
|
|||
};
|
||||
|
||||
// Forward decls
|
||||
Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
|
||||
Instruction *
|
||||
VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
|
||||
Instruction *
|
||||
VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
|
||||
Instruction *
|
||||
VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
|
||||
Instruction *
|
||||
VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
|
||||
Instruction *
|
||||
VCONVERT_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
|
||||
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
|
||||
Instruction*
|
||||
VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
|
||||
Instruction*
|
||||
VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
|
||||
Instruction*
|
||||
VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
|
||||
Instruction*
|
||||
VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
|
||||
Instruction*
|
||||
VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
|
||||
|
||||
Instruction *DOUBLE_EMU(LowerX86 * pThis,
|
||||
Instruction* DOUBLE_EMU(LowerX86* pThis,
|
||||
TargetArch arch,
|
||||
TargetWidth width,
|
||||
CallInst * pCallInst,
|
||||
CallInst* pCallInst,
|
||||
Intrinsic::ID intrin);
|
||||
|
||||
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
|
||||
|
@ -188,7 +188,7 @@ namespace SwrJit
|
|||
|
||||
struct LowerX86 : public FunctionPass
|
||||
{
|
||||
LowerX86(Builder *b = nullptr) : FunctionPass(ID), B(b)
|
||||
LowerX86(Builder* b = nullptr) : FunctionPass(ID), B(b)
|
||||
{
|
||||
initializeLowerX86Pass(*PassRegistry::getPassRegistry());
|
||||
|
||||
|
@ -216,12 +216,12 @@ namespace SwrJit
|
|||
// across all intrinsics, and will have to be rethought. Probably need something
|
||||
// similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
|
||||
// intrinsic.
|
||||
void GetRequestedWidthAndType(CallInst * pCallInst,
|
||||
void GetRequestedWidthAndType(CallInst* pCallInst,
|
||||
const StringRef intrinName,
|
||||
TargetWidth * pWidth,
|
||||
Type ** pTy)
|
||||
TargetWidth* pWidth,
|
||||
Type** pTy)
|
||||
{
|
||||
Type *pVecTy = pCallInst->getType();
|
||||
Type* pVecTy = pCallInst->getType();
|
||||
|
||||
// Check for intrinsic specific types
|
||||
// VCVTPD2PS type comes from src, not dst
|
||||
|
@ -232,7 +232,7 @@ namespace SwrJit
|
|||
|
||||
if (!pVecTy->isVectorTy())
|
||||
{
|
||||
for (auto &op : pCallInst->arg_operands())
|
||||
for (auto& op : pCallInst->arg_operands())
|
||||
{
|
||||
if (op.get()->getType()->isVectorTy())
|
||||
{
|
||||
|
@ -260,7 +260,7 @@ namespace SwrJit
|
|||
*pTy = pVecTy->getScalarType();
|
||||
}
|
||||
|
||||
Value *GetZeroVec(TargetWidth width, Type *pTy)
|
||||
Value* GetZeroVec(TargetWidth width, Type* pTy)
|
||||
{
|
||||
uint32_t numElem = 0;
|
||||
switch (width)
|
||||
|
@ -278,9 +278,9 @@ namespace SwrJit
|
|||
return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
|
||||
}
|
||||
|
||||
Value *GetMask(TargetWidth width)
|
||||
Value* GetMask(TargetWidth width)
|
||||
{
|
||||
Value *mask;
|
||||
Value* mask;
|
||||
switch (width)
|
||||
{
|
||||
case W256:
|
||||
|
@ -296,18 +296,18 @@ namespace SwrJit
|
|||
}
|
||||
|
||||
// Convert <N x i1> mask to <N x i32> x86 mask
|
||||
Value *VectorMask(Value *vi1Mask)
|
||||
Value* VectorMask(Value* vi1Mask)
|
||||
{
|
||||
uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
|
||||
return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
|
||||
}
|
||||
|
||||
Instruction *ProcessIntrinsicAdvanced(CallInst *pCallInst)
|
||||
Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
|
||||
{
|
||||
Function * pFunc = pCallInst->getCalledFunction();
|
||||
auto & intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
|
||||
Function* pFunc = pCallInst->getCalledFunction();
|
||||
auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
|
||||
TargetWidth vecWidth;
|
||||
Type * pElemTy;
|
||||
Type* pElemTy;
|
||||
GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
|
||||
|
||||
// Check if there is a native intrinsic for this instruction
|
||||
|
@ -323,9 +323,9 @@ namespace SwrJit
|
|||
}
|
||||
else if (id != Intrinsic::not_intrinsic)
|
||||
{
|
||||
Function *pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
|
||||
SmallVector<Value *, 8> args;
|
||||
for (auto &arg : pCallInst->arg_operands())
|
||||
Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
|
||||
SmallVector<Value*, 8> args;
|
||||
for (auto& arg : pCallInst->arg_operands())
|
||||
{
|
||||
args.push_back(arg.get());
|
||||
}
|
||||
|
@ -361,9 +361,9 @@ namespace SwrJit
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction *ProcessIntrinsic(CallInst *pCallInst)
|
||||
Instruction* ProcessIntrinsic(CallInst* pCallInst)
|
||||
{
|
||||
Function *pFunc = pCallInst->getCalledFunction();
|
||||
Function* pFunc = pCallInst->getCalledFunction();
|
||||
|
||||
// Forward to the advanced support if found
|
||||
if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
|
||||
|
@ -376,11 +376,11 @@ namespace SwrJit
|
|||
pFunc->getName());
|
||||
|
||||
Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
|
||||
Function * pX86IntrinFunc =
|
||||
Function* pX86IntrinFunc =
|
||||
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
|
||||
|
||||
SmallVector<Value *, 8> args;
|
||||
for (auto &arg : pCallInst->arg_operands())
|
||||
SmallVector<Value*, 8> args;
|
||||
for (auto& arg : pCallInst->arg_operands())
|
||||
{
|
||||
args.push_back(arg.get());
|
||||
}
|
||||
|
@ -390,23 +390,23 @@ namespace SwrJit
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief LLVM funtion pass run method.
|
||||
/// @param f- The function we're working on with this pass.
|
||||
virtual bool runOnFunction(Function &F)
|
||||
virtual bool runOnFunction(Function& F)
|
||||
{
|
||||
std::vector<Instruction *> toRemove;
|
||||
std::vector<Instruction*> toRemove;
|
||||
|
||||
for (auto &BB : F.getBasicBlockList())
|
||||
for (auto& BB : F.getBasicBlockList())
|
||||
{
|
||||
for (auto &I : BB.getInstList())
|
||||
for (auto& I : BB.getInstList())
|
||||
{
|
||||
if (CallInst *pCallInst = dyn_cast<CallInst>(&I))
|
||||
if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
|
||||
{
|
||||
Function *pFunc = pCallInst->getCalledFunction();
|
||||
Function* pFunc = pCallInst->getCalledFunction();
|
||||
if (pFunc)
|
||||
{
|
||||
if (pFunc->getName().startswith("meta.intrinsic"))
|
||||
{
|
||||
B->IRB()->SetInsertPoint(&I);
|
||||
Instruction *pReplace = ProcessIntrinsic(pCallInst);
|
||||
Instruction* pReplace = ProcessIntrinsic(pCallInst);
|
||||
SWR_ASSERT(pReplace);
|
||||
toRemove.push_back(pCallInst);
|
||||
pCallInst->replaceAllUsesWith(pReplace);
|
||||
|
@ -416,7 +416,7 @@ namespace SwrJit
|
|||
}
|
||||
}
|
||||
|
||||
for (auto *pInst : toRemove)
|
||||
for (auto* pInst : toRemove)
|
||||
{
|
||||
pInst->eraseFromParent();
|
||||
}
|
||||
|
@ -426,11 +426,11 @@ namespace SwrJit
|
|||
return true;
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
|
||||
virtual void getAnalysisUsage(AnalysisUsage& AU) const {}
|
||||
|
||||
JitManager *JM() { return B->JM(); }
|
||||
JitManager* JM() { return B->JM(); }
|
||||
|
||||
Builder *B;
|
||||
Builder* B;
|
||||
|
||||
TargetArch mTarget;
|
||||
|
||||
|
@ -439,24 +439,24 @@ namespace SwrJit
|
|||
|
||||
char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
|
||||
|
||||
FunctionPass *createLowerX86Pass(Builder *b) { return new LowerX86(b); }
|
||||
FunctionPass* createLowerX86Pass(Builder* b) { return new LowerX86(b); }
|
||||
|
||||
Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
|
||||
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
{
|
||||
SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction *VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
|
||||
Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
{
|
||||
// Only need vperm emulation for AVX
|
||||
SWR_ASSERT(arch == AVX);
|
||||
|
||||
Builder *B = pThis->B;
|
||||
Builder* B = pThis->B;
|
||||
auto v32A = pCallInst->getArgOperand(0);
|
||||
auto vi32Index = pCallInst->getArgOperand(1);
|
||||
|
||||
Value *v32Result;
|
||||
Value* v32Result;
|
||||
if (isa<Constant>(vi32Index))
|
||||
{
|
||||
// Can use llvm shuffle vector directly with constant shuffle indices
|
||||
|
@ -475,10 +475,10 @@ namespace SwrJit
|
|||
return cast<Instruction>(v32Result);
|
||||
}
|
||||
|
||||
Instruction *
|
||||
VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
|
||||
Instruction*
|
||||
VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
{
|
||||
Builder *B = pThis->B;
|
||||
Builder* B = pThis->B;
|
||||
auto vSrc = pCallInst->getArgOperand(0);
|
||||
auto pBase = pCallInst->getArgOperand(1);
|
||||
auto vi32Indices = pCallInst->getArgOperand(2);
|
||||
|
@ -489,7 +489,7 @@ namespace SwrJit
|
|||
uint32_t numElem = vSrc->getType()->getVectorNumElements();
|
||||
auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
|
||||
auto srcTy = vSrc->getType()->getVectorElementType();
|
||||
Value * v32Gather;
|
||||
Value* v32Gather;
|
||||
if (arch == AVX)
|
||||
{
|
||||
// Full emulation for AVX
|
||||
|
@ -518,7 +518,7 @@ namespace SwrJit
|
|||
}
|
||||
else if (arch == AVX2 || (arch == AVX512 && width == W256))
|
||||
{
|
||||
Function *pX86IntrinFunc;
|
||||
Function* pX86IntrinFunc;
|
||||
if (srcTy == B->mFP32Ty)
|
||||
{
|
||||
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
|
||||
|
@ -555,14 +555,14 @@ namespace SwrJit
|
|||
VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
|
||||
v64Mask = B->BITCAST(v64Mask, vSrc->getType());
|
||||
|
||||
Value *src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
|
||||
Value *src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
|
||||
Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
|
||||
Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
|
||||
|
||||
Value *indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
|
||||
Value *indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
|
||||
Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
|
||||
Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
|
||||
|
||||
Value *mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
|
||||
Value *mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
|
||||
Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
|
||||
Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
|
||||
|
||||
src0 = B->BITCAST(
|
||||
src0,
|
||||
|
@ -570,7 +570,7 @@ namespace SwrJit
|
|||
mask0 = B->BITCAST(
|
||||
mask0,
|
||||
VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
|
||||
Value *gather0 =
|
||||
Value* gather0 =
|
||||
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
|
||||
src1 = B->BITCAST(
|
||||
src1,
|
||||
|
@ -578,7 +578,7 @@ namespace SwrJit
|
|||
mask1 = B->BITCAST(
|
||||
mask1,
|
||||
VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
|
||||
Value *gather1 =
|
||||
Value* gather1 =
|
||||
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
|
||||
|
||||
v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
|
||||
|
@ -589,18 +589,18 @@ namespace SwrJit
|
|||
// Double pump 8-wide for 32bit elements
|
||||
auto v32Mask = pThis->VectorMask(vi1Mask);
|
||||
v32Mask = B->BITCAST(v32Mask, vSrc->getType());
|
||||
Value *src0 = B->EXTRACT_16(vSrc, 0);
|
||||
Value *src1 = B->EXTRACT_16(vSrc, 1);
|
||||
Value* src0 = B->EXTRACT_16(vSrc, 0);
|
||||
Value* src1 = B->EXTRACT_16(vSrc, 1);
|
||||
|
||||
Value *indices0 = B->EXTRACT_16(vi32Indices, 0);
|
||||
Value *indices1 = B->EXTRACT_16(vi32Indices, 1);
|
||||
Value* indices0 = B->EXTRACT_16(vi32Indices, 0);
|
||||
Value* indices1 = B->EXTRACT_16(vi32Indices, 1);
|
||||
|
||||
Value *mask0 = B->EXTRACT_16(v32Mask, 0);
|
||||
Value *mask1 = B->EXTRACT_16(v32Mask, 1);
|
||||
Value* mask0 = B->EXTRACT_16(v32Mask, 0);
|
||||
Value* mask1 = B->EXTRACT_16(v32Mask, 1);
|
||||
|
||||
Value *gather0 =
|
||||
Value* gather0 =
|
||||
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
|
||||
Value *gather1 =
|
||||
Value* gather1 =
|
||||
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
|
||||
|
||||
v32Gather = B->JOIN_16(gather0, gather1);
|
||||
|
@ -609,8 +609,8 @@ namespace SwrJit
|
|||
}
|
||||
else if (arch == AVX512)
|
||||
{
|
||||
Value * iMask;
|
||||
Function *pX86IntrinFunc;
|
||||
Value* iMask;
|
||||
Function* pX86IntrinFunc;
|
||||
if (srcTy == B->mFP32Ty)
|
||||
{
|
||||
pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
|
||||
|
@ -643,8 +643,8 @@ namespace SwrJit
|
|||
|
||||
// No support for vroundps in avx512 (it is available in kncni), so emulate with avx
|
||||
// instructions
|
||||
Instruction *
|
||||
VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
|
||||
Instruction*
|
||||
VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
{
|
||||
SWR_ASSERT(arch == AVX512);
|
||||
|
||||
|
@ -676,22 +676,25 @@ namespace SwrJit
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction *VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
Instruction*
|
||||
VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
{
|
||||
SWR_ASSERT(arch == AVX512);
|
||||
|
||||
auto B = pThis->B;
|
||||
auto B = pThis->B;
|
||||
auto vf32Src = pCallInst->getOperand(0);
|
||||
|
||||
if (width == W256)
|
||||
{
|
||||
auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
|
||||
auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
|
||||
Intrinsic::x86_avx_round_ps_256);
|
||||
return cast<Instruction>(B->FP_TRUNC(vf32SrcRound, B->mFP32Ty));
|
||||
}
|
||||
else if (width == W512)
|
||||
{
|
||||
// 512 can use intrinsic
|
||||
auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_mask_cvtpd2ps_512);
|
||||
auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
|
||||
Intrinsic::x86_avx512_mask_cvtpd2ps_512);
|
||||
return cast<Instruction>(B->CALL(pfnFunc, vf32Src));
|
||||
}
|
||||
else
|
||||
|
@ -703,7 +706,7 @@ namespace SwrJit
|
|||
}
|
||||
|
||||
// No support for hsub in AVX512
|
||||
Instruction *VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
|
||||
Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
|
||||
{
|
||||
SWR_ASSERT(arch == AVX512);
|
||||
|
||||
|
@ -734,27 +737,27 @@ namespace SwrJit
|
|||
|
||||
// Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from
|
||||
// each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide
|
||||
Instruction *DOUBLE_EMU(LowerX86 * pThis,
|
||||
Instruction* DOUBLE_EMU(LowerX86* pThis,
|
||||
TargetArch arch,
|
||||
TargetWidth width,
|
||||
CallInst * pCallInst,
|
||||
CallInst* pCallInst,
|
||||
Intrinsic::ID intrin)
|
||||
{
|
||||
auto B = pThis->B;
|
||||
SWR_ASSERT(width == W512);
|
||||
Value * result[2];
|
||||
Function *pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
|
||||
Value* result[2];
|
||||
Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
|
||||
for (uint32_t i = 0; i < 2; ++i)
|
||||
{
|
||||
SmallVector<Value *, 8> args;
|
||||
for (auto &arg : pCallInst->arg_operands())
|
||||
SmallVector<Value*, 8> args;
|
||||
for (auto& arg : pCallInst->arg_operands())
|
||||
{
|
||||
auto argType = arg.get()->getType();
|
||||
if (argType->isVectorTy())
|
||||
{
|
||||
uint32_t vecWidth = argType->getVectorNumElements();
|
||||
Value * lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
|
||||
Value * argToPush = B->VSHUFFLE(
|
||||
Value* lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
|
||||
Value* argToPush = B->VSHUFFLE(
|
||||
arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
|
||||
args.push_back(argToPush);
|
||||
}
|
||||
|
@ -776,7 +779,7 @@ namespace SwrJit
|
|||
{
|
||||
vecWidth = 2;
|
||||
}
|
||||
Value *lanes = B->CInc<int>(0, vecWidth);
|
||||
Value* lanes = B->CInc<int>(0, vecWidth);
|
||||
return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue