swr: [rasterizer core] fix dependency bug
Never be dependent on "draw 0", instead have a bool that makes the draw dependent on the previous draw or not dependent at all. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
73a9154bde
commit
a16d274032
|
@ -322,7 +322,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
|
||||||
|
|
||||||
SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
|
SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
|
||||||
|
|
||||||
pCurDrawContext->dependency = 0;
|
pCurDrawContext->dependent = false;
|
||||||
pCurDrawContext->pContext = pContext;
|
pCurDrawContext->pContext = pContext;
|
||||||
pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
|
pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
|
||||||
|
|
||||||
|
@ -406,7 +406,7 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint
|
||||||
pDC->FeWork.desc.sync.userData3 = userData3;
|
pDC->FeWork.desc.sync.userData3 = userData3;
|
||||||
|
|
||||||
// cannot execute until all previous draws have completed
|
// cannot execute until all previous draws have completed
|
||||||
pDC->dependency = pDC->drawId - 1;
|
pDC->dependent = true;
|
||||||
|
|
||||||
//enqueue
|
//enqueue
|
||||||
QueueDraw(pContext);
|
QueueDraw(pContext);
|
||||||
|
@ -1500,7 +1500,7 @@ void SwrGetStats(
|
||||||
pDC->FeWork.desc.queryStats.pStats = pStats;
|
pDC->FeWork.desc.queryStats.pStats = pStats;
|
||||||
|
|
||||||
// cannot execute until all previous draws have completed
|
// cannot execute until all previous draws have completed
|
||||||
pDC->dependency = pDC->drawId - 1;
|
pDC->dependent = true;
|
||||||
|
|
||||||
//enqueue
|
//enqueue
|
||||||
QueueDraw(pContext);
|
QueueDraw(pContext);
|
||||||
|
|
|
@ -381,8 +381,6 @@ struct DRAW_STATE
|
||||||
struct DRAW_CONTEXT
|
struct DRAW_CONTEXT
|
||||||
{
|
{
|
||||||
SWR_CONTEXT* pContext;
|
SWR_CONTEXT* pContext;
|
||||||
uint32_t drawId;
|
|
||||||
uint32_t dependency;
|
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
MacroTileMgr* pTileMgr;
|
MacroTileMgr* pTileMgr;
|
||||||
|
@ -391,6 +389,8 @@ struct DRAW_CONTEXT
|
||||||
DRAW_STATE* pState;
|
DRAW_STATE* pState;
|
||||||
CachingArena* pArena;
|
CachingArena* pArena;
|
||||||
|
|
||||||
|
uint32_t drawId;
|
||||||
|
bool dependent;
|
||||||
bool isCompute; // Is this DC a compute context?
|
bool isCompute; // Is this DC a compute context?
|
||||||
bool cleanupState; // True if this is the last draw using an entry in the state ring.
|
bool cleanupState; // True if this is the last draw using an entry in the state ring.
|
||||||
volatile bool doneFE; // Is FE work done for this draw?
|
volatile bool doneFE; // Is FE work done for this draw?
|
||||||
|
|
|
@ -46,6 +46,7 @@ public:
|
||||||
void Init(uint32_t numEntries)
|
void Init(uint32_t numEntries)
|
||||||
{
|
{
|
||||||
SWR_ASSERT(numEntries > 0);
|
SWR_ASSERT(numEntries > 0);
|
||||||
|
SWR_ASSERT(((1ULL << 32) % numEntries) == 0, "%d is not evenly divisible into 2 ^ 32. Wrap errors will occur!", numEntries);
|
||||||
mNumEntries = numEntries;
|
mNumEntries = numEntries;
|
||||||
mpRingBuffer = (T*)AlignedMalloc(sizeof(T)*numEntries, 64);
|
mpRingBuffer = (T*)AlignedMalloc(sizeof(T)*numEntries, 64);
|
||||||
SWR_ASSERT(mpRingBuffer != nullptr);
|
SWR_ASSERT(mpRingBuffer != nullptr);
|
||||||
|
@ -67,6 +68,8 @@ public:
|
||||||
INLINE void Enqueue()
|
INLINE void Enqueue()
|
||||||
{
|
{
|
||||||
mRingHead++; // There's only one producer.
|
mRingHead++; // There's only one producer.
|
||||||
|
// Assert to find wrap-around cases, NEVER ENABLE DURING CHECKIN!!
|
||||||
|
// SWR_REL_ASSERT(mRingHead);
|
||||||
}
|
}
|
||||||
|
|
||||||
INLINE void Dequeue()
|
INLINE void Dequeue()
|
||||||
|
@ -81,10 +84,7 @@ public:
|
||||||
|
|
||||||
INLINE bool IsFull()
|
INLINE bool IsFull()
|
||||||
{
|
{
|
||||||
///@note We don't handle wrap case due to using 64-bit indices.
|
uint32_t numEnqueued = GetHead() - GetTail();
|
||||||
/// It would take 11 million years to wrap at 50,000 DCs per sec.
|
|
||||||
/// If we used 32-bit indices then its about 23 hours to wrap.
|
|
||||||
uint64_t numEnqueued = GetHead() - GetTail();
|
|
||||||
SWR_ASSERT(numEnqueued <= mNumEntries);
|
SWR_ASSERT(numEnqueued <= mNumEntries);
|
||||||
|
|
||||||
return (numEnqueued == mNumEntries);
|
return (numEnqueued == mNumEntries);
|
||||||
|
|
|
@ -317,7 +317,7 @@ bool IDComparesLess(uint32_t a, uint32_t b)
|
||||||
INLINE
|
INLINE
|
||||||
bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
|
bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
|
||||||
{
|
{
|
||||||
return IDComparesLess(lastRetiredDraw, pDC->dependency);
|
return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// inlined-only version
|
// inlined-only version
|
||||||
|
|
Loading…
Reference in New Issue