swr/rasterizer: enable size accumulation in mem stats

Small refactoring is also performed

Reviewed-by: Alok Hota <alok.hota@intel.com>
This commit is contained in:
Jan Zielinski 2019-07-26 16:43:58 +02:00
parent 365ad367f1
commit 4f04f260d9
11 changed files with 128 additions and 104 deletions

View File

@ -98,6 +98,8 @@ namespace ArchRast
{
uint32_t accessCountRead;
uint32_t accessCountWrite;
uint32_t totalSizeRead;
uint32_t totalSizeWrite;
uint64_t tscMin;
uint64_t tscMax;
};
@ -113,7 +115,7 @@ namespace ArchRast
typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
MemoryTrackerMap trackedMemory = {};
void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc)
void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
{
MemoryTrackerKey key;
key.address = address;
@ -126,10 +128,12 @@ namespace ArchRast
if (isRead)
{
i->second.accessCountRead++;
i->second.totalSizeRead += size;
}
else
{
i->second.accessCountWrite++;
i->second.totalSizeWrite += size;
}
i->second.tscMax = tsc;
}
@ -140,12 +144,16 @@ namespace ArchRast
if (isRead)
{
data.accessCountRead = 1;
data.totalSizeRead = size;
data.accessCountWrite = 0;
data.totalSizeWrite = 0;
}
else
{
data.accessCountRead = 0;
data.totalSizeRead = 0;
data.accessCountWrite = 1;
data.totalSizeWrite = size;
}
data.tscMin = tsc;
data.tscMax = tsc;
@ -258,6 +266,7 @@ namespace ArchRast
mAddressMask = (mAddressMask << 1) | 1;
addressRangeBytes = addressRangeBytes >> 1;
}
mMemGranularity = mAddressMask + 1;
mAddressMask = ~mAddressMask;
}
@ -666,7 +675,19 @@ namespace ArchRast
virtual void Handle(const MemoryAccessEvent& event)
{
mMemoryStats.TrackMemoryAccess(event.data.ptr, mAddressMask, event.data.isRead, event.data.tsc);
uint64_t trackAddr = event.data.ptr;
uint64_t nextAddr = (trackAddr & mAddressMask);
uint32_t sizeTracked = 0;
while (sizeTracked < event.data.size)
{
nextAddr += mMemGranularity;
uint32_t size = nextAddr - trackAddr;
size = std::min(event.data.size, size);
mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
sizeTracked += size;
trackAddr = nextAddr;
}
}
virtual void Handle(const MemoryStatsEndEvent& event)
@ -678,6 +699,8 @@ namespace ArchRast
i->first.address & mAddressMask,
i->second.accessCountRead,
i->second.accessCountWrite,
i->second.totalSizeRead,
i->second.totalSizeWrite,
i->second.tscMin,
i->second.tscMax);
EventHandlerFile::Handle(mse);
@ -734,6 +757,7 @@ namespace ArchRast
MemoryStats mMemoryStats = {};
uint64_t mAddressMask = 0;
uint64_t mMemGranularity = 0;
};

View File

@ -480,6 +480,8 @@ event MemoryStatsEvent
uint64_t baseAddr;
uint32_t accessCountRead;
uint32_t accessCountWrite;
uint32_t totalSizeRead;
uint32_t totalSizeWrite;
uint64_t tscMin;
uint64_t tscMax;
};

View File

@ -233,7 +233,17 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
numSamples);
if (pHotTile)
{
pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;;
if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR)
{
if (newState == HOTTILE_INVALID)
{
// This is OK for APIs that explicitly allow discards
// (for e.g. depth / stencil data)
//SWR_INVALID("Discarding valid data!");
}
}
pHotTile->state = newState;
}
}
}

View File

@ -32,7 +32,6 @@
#include "common/rdtsc_buckets.h"
#include "builder_gfx_mem.h"
namespace SwrJit
{
using namespace llvm;
@ -45,20 +44,18 @@ namespace SwrJit
mpfnTrackMemAccess = nullptr;
mpParamSimDC = nullptr;
mpWorkerData = nullptr;
}
void BuilderGfxMem::NotifyPrivateContextSet()
{
}
void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
{
SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL),
SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
"Internal memory should not be gfxptr_t.");
}
//////////////////////////////////////////////////////////////////////////
/// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
@ -72,7 +69,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
@ -97,9 +94,8 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
{
@ -111,19 +107,17 @@ namespace SwrJit
}
void BuilderGfxMem::SCATTERPS(
Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
if (pDst->getType() == mInt64Ty)
{
pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
}
Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage);
Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
}
Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return ADD(base, offset);
@ -159,7 +153,6 @@ namespace SwrJit
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
// address may be coming in as 64bit int now so get the pointer
if (Ptr->getType() == mInt64Ty)
{
@ -169,7 +162,7 @@ namespace SwrJit
return Ptr;
}
void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead)
void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
{
#if defined(KNOB_ENABLE_AR)
if (!KNOB_TRACK_MEMORY_WORKING_SET)
@ -216,7 +209,7 @@ namespace SwrJit
return;
}
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@ -225,7 +218,7 @@ namespace SwrJit
return Builder::LOAD(Ptr, Name);
}
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@ -234,9 +227,8 @@ namespace SwrJit
return Builder::LOAD(Ptr, Name);
}
LoadInst* BuilderGfxMem::LOAD(
Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@ -249,7 +241,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& name,
Type* Ty,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
@ -274,14 +266,13 @@ namespace SwrJit
return LOAD(BasePtr, name, Ty, usage);
}
CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
unsigned Align,
Value* Mask,
Value* PassThru,
const Twine& Name,
Type* Ty,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, true);
@ -291,7 +282,7 @@ namespace SwrJit
}
StoreInst*
BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage)
BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
TrackerHelper(Ptr, Ty, usage, false);
@ -304,7 +295,7 @@ namespace SwrJit
Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
Type* Ty,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
TrackerHelper(BasePtr, Ty, usage, false);
@ -314,7 +305,7 @@ namespace SwrJit
}
CallInst* BuilderGfxMem::MASKED_STORE(
Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, JIT_MEM_CLIENT usage)
Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
@ -327,7 +318,7 @@ namespace SwrJit
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
JIT_MEM_CLIENT /* usage */)
MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
@ -339,7 +330,7 @@ namespace SwrJit
Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
JIT_MEM_CLIENT /* usage */)
MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{

View File

@ -51,22 +51,21 @@ namespace SwrJit
virtual LoadInst* LOAD(Value* Ptr,
const char* Name,
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@ -74,61 +73,57 @@ namespace SwrJit
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
Type* PtrTy = nullptr,
const Twine& Name = "",
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
protected:
void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
virtual void NotifyPrivateContextSet();
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
Value* TranslationHelper(Value* Ptr, Type* Ty);
void TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead);
void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
Value* GetParamSimDC() { return mpParamSimDC; }
Value* mpWorkerData;
private:

View File

@ -34,7 +34,7 @@
namespace SwrJit
{
void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
{
SWR_ASSERT(
ptr->getType() != mInt64Ty,
@ -93,26 +93,26 @@ namespace SwrJit
return IN_BOUNDS_GEP(ptr, indices);
}
LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
LoadInst*
Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
@ -122,7 +122,7 @@ namespace SwrJit
const std::initializer_list<uint32_t>& indices,
const llvm::Twine& name,
Type* Ty,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@ -141,7 +141,7 @@ namespace SwrJit
}
StoreInst*
Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage)
Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
@ -186,7 +186,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@ -206,7 +206,7 @@ namespace SwrJit
Value* vIndices,
Value* vMask,
uint8_t scale,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
@ -243,7 +243,7 @@ namespace SwrJit
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
@ -262,7 +262,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@ -336,7 +336,7 @@ namespace SwrJit
Value* vMask,
Value* vGatherComponents[],
bool bPackedOutput,
JIT_MEM_CLIENT usage)
MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@ -643,7 +643,7 @@ namespace SwrJit
/// @param vOffsets - vector of byte offsets from pDst
/// @param vMask - mask of valid lanes
void Builder::SCATTERPS(
Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
AssertMemoryUsageParams(pDst, usage);

View File

@ -30,7 +30,7 @@
#pragma once
public:
enum class JIT_MEM_CLIENT
enum class MEM_CLIENT
{
MEM_CLIENT_INTERNAL,
GFX_MEM_CLIENT_FETCH,
@ -41,7 +41,7 @@ enum class JIT_MEM_CLIENT
protected:
virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
public:
virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
@ -57,23 +57,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
virtual LoadInst*
LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst*
LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* Ptr,
bool isVolatile,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual LoadInst* LOAD(Value* BasePtr,
const std::initializer_list<uint32_t>& offset,
const llvm::Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_LOAD(Value* Ptr,
unsigned Align,
@ -81,19 +81,19 @@ virtual CallInst* MASKED_LOAD(Value* Ptr,
Value* PassThru = nullptr,
const Twine& Name = "",
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateStore(Val, Ptr, isVolatile);
}
virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
}
@ -112,14 +112,14 @@ void Gather4(const SWR_FORMAT format,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@ -127,14 +127,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
virtual Value* GATHERDD(Value* src,
Value* pBase,
Value* indices,
Value* mask,
uint8_t scale = 1,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* pSrcBase,
@ -142,7 +142,7 @@ void GATHER4DD(const SWR_FORMAT_INFO& info,
Value* mask,
Value* vGatherComponents[],
bool bPackedOutput,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
@ -152,7 +152,7 @@ virtual void SCATTERPS(Value* pDst,
Value* vSrc,
Value* vOffsets,
Value* vMask,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
Value* vGatherInput,

View File

@ -774,14 +774,15 @@ namespace SwrJit
{
SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
Value* fixed = nullptr;
#if 0
// This doesn't work for negative numbers!!
#if 0 // This doesn't work for negative numbers!!
{
fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
C(_MM_FROUND_TO_NEAREST_INT)),
mSimdInt32Ty);
}
#else
else
#endif
{
// Do round to nearest int on fractional bits first
// Not entirely perfect for negative numbers, but close enough
@ -804,7 +805,7 @@ namespace SwrJit
fixed = ASHR(vFixed, vExtraBits, name);
}
#endif
return fixed;
}
@ -845,8 +846,7 @@ namespace SwrJit
{
SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
Value* fixed = nullptr;
#if 1
// KNOB_SIM_FAST_MATH? Below works correctly from a precision
#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision
// standpoint...
{
fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),

View File

@ -205,7 +205,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
? vIndices = LOAD(indices,
"",
PointerType::get(mSimdInt32Ty, 0),
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
: vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
break; // incoming type is already 32bit int
default:
@ -382,7 +382,7 @@ void FetchJit::CreateGatherOddFormats(
if (info.bpp == 32)
{
pGather =
GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
else
{
@ -416,7 +416,7 @@ void FetchJit::CreateGatherOddFormats(
{
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
@ -424,7 +424,7 @@ void FetchJit::CreateGatherOddFormats(
{
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
break;
@ -434,12 +434,12 @@ void FetchJit::CreateGatherOddFormats(
// First 16-bits of data
Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
// Last 8-bits of data
pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0));
xpSrc = ADD(xpSrc, C(2));
STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
xpSrc = ADD(xpSrc, C((int64_t)2));
STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
break;
}
@ -750,7 +750,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
// if we have at least one component out of x or y to fetch
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@ -763,7 +763,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
// offset base to the next components(zw) in the vertex to gather
pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@ -811,7 +811,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vNewOffsets,
vGatherMask,
1,
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
else
{
@ -957,7 +957,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
@ -991,7 +991,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@ -1009,7 +1009,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@ -1050,7 +1050,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
vOffsets,
vGatherMask,
1,
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
if (conversionType == CONVERT_USCALED)
{
@ -1147,7 +1147,7 @@ Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
// if valid, load the index. if not, load 0 from the stack
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
Value* index = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
Value* index = LOAD(pValid, "valid index", Ty, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
// zero extended index to 32 bits and insert into the correct simd lane
index = Z_EXT(index, mInt32Ty);
@ -1222,7 +1222,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
VIMMED1(0),
"vIndices",
PointerType::get(mSimdInt32Ty, 0),
JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
}
//////////////////////////////////////////////////////////////////////////

View File

@ -83,6 +83,8 @@ void JITCALL JitDestroyContext(HANDLE hJitContext);
/// @param output - Output containing information about JIT shader
ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
ShaderInfo* JITCALL JitGetShader(HANDLE hJitContext, const char* name);
//////////////////////////////////////////////////////////////////////////
/// @brief JIT destroy shader.
/// @param hJitContext - Jit Context

View File

@ -155,7 +155,7 @@ struct StreamOutJit : public BuilderGfxMem
// cast mask to <4xi1>
Value* mask = ToMask(packedMask);
MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
}
// increment SO buffer
@ -223,7 +223,7 @@ struct StreamOutJit : public BuilderGfxMem
Value* pBuf = getSOBuffer(pSoCtx, b);
Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
pOutBufferStartVertex[b] = pOutBuffer[b];
outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});