swr: [rasterizer core] Fix global arena allocator bug
- Plus some minor code refactoring Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
e42f00ee39
commit
794be41f91
|
@ -37,20 +37,6 @@
|
|||
#include <atomic>
|
||||
#include "core/utils.h"
|
||||
|
||||
class DefaultAllocator
|
||||
{
|
||||
public:
|
||||
void* AllocateAligned(size_t size, size_t align)
|
||||
{
|
||||
void* p = _aligned_malloc(size, align);
|
||||
return p;
|
||||
}
|
||||
void Free(void* pMem)
|
||||
{
|
||||
_aligned_free(pMem);
|
||||
}
|
||||
};
|
||||
|
||||
static const size_t ARENA_BLOCK_ALIGN = 64;
|
||||
|
||||
struct ArenaBlock
|
||||
|
@ -59,25 +45,46 @@ struct ArenaBlock
|
|||
ArenaBlock* pNext = nullptr;
|
||||
};
|
||||
static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
|
||||
"Increase BLOCK_ALIGN size");
|
||||
"Increase BLOCK_ALIGN size");
|
||||
|
||||
class DefaultAllocator
|
||||
{
|
||||
public:
|
||||
ArenaBlock* AllocateAligned(size_t size, size_t align)
|
||||
{
|
||||
SWR_ASSERT(size >= sizeof(ArenaBlock));
|
||||
|
||||
ArenaBlock* p = new (_aligned_malloc(size, align)) ArenaBlock();
|
||||
p->blockSize = size;
|
||||
return p;
|
||||
}
|
||||
|
||||
void Free(ArenaBlock* pMem)
|
||||
{
|
||||
if (pMem)
|
||||
{
|
||||
SWR_ASSERT(pMem->blockSize < size_t(0xdddddddd));
|
||||
_aligned_free(pMem);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Caching Allocator for Arena
|
||||
template<uint32_t NumBucketsT = 4, uint32_t StartBucketBitT = 16>
|
||||
template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
|
||||
struct CachingAllocatorT : DefaultAllocator
|
||||
{
|
||||
void* AllocateAligned(size_t size, size_t align)
|
||||
ArenaBlock* AllocateAligned(size_t size, size_t align)
|
||||
{
|
||||
SWR_ASSERT(size >= sizeof(ArenaBlock));
|
||||
SWR_ASSERT(size <= uint32_t(-1));
|
||||
|
||||
size_t blockSize = size - ARENA_BLOCK_ALIGN;
|
||||
uint32_t bucket = GetBucketId(blockSize);
|
||||
uint32_t bucket = GetBucketId(size);
|
||||
|
||||
{
|
||||
// search cached blocks
|
||||
std::lock_guard<std::mutex> l(m_mutex);
|
||||
ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
|
||||
ArenaBlock* pBlock = SearchBlocks(pPrevBlock, blockSize, align);
|
||||
ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
|
||||
|
||||
if (pBlock)
|
||||
{
|
||||
|
@ -89,15 +96,15 @@ struct CachingAllocatorT : DefaultAllocator
|
|||
}
|
||||
else
|
||||
{
|
||||
pPrevBlock = &m_oldCachedBlocks[GetBucketId(blockSize)];
|
||||
pBlock = SearchBlocks(pPrevBlock, blockSize, align);
|
||||
pPrevBlock = &m_oldCachedBlocks[bucket];
|
||||
pBlock = SearchBlocks(pPrevBlock, size, align);
|
||||
|
||||
if (pBlock)
|
||||
{
|
||||
m_oldCachedSize -= pBlock->blockSize;
|
||||
if (pBlock == m_pOldLastCachedBlocks[bucket])
|
||||
{
|
||||
m_pLastCachedBlocks[bucket] = pPrevBlock;
|
||||
m_pOldLastCachedBlocks[bucket] = pPrevBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -126,15 +133,14 @@ struct CachingAllocatorT : DefaultAllocator
|
|||
return this->DefaultAllocator::AllocateAligned(size, align);
|
||||
}
|
||||
|
||||
void Free(void* pMem)
|
||||
void Free(ArenaBlock* pMem)
|
||||
{
|
||||
if (pMem)
|
||||
{
|
||||
ArenaBlock* pNewBlock = reinterpret_cast<ArenaBlock*>(pMem);
|
||||
SWR_ASSERT(pNewBlock->blockSize >= 0);
|
||||
SWR_ASSERT(pMem->blockSize >= 0);
|
||||
|
||||
std::unique_lock<std::mutex> l(m_mutex);
|
||||
InsertCachedBlock(GetBucketId(pNewBlock->blockSize), pNewBlock);
|
||||
InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -154,7 +160,7 @@ struct CachingAllocatorT : DefaultAllocator
|
|||
{
|
||||
ArenaBlock* pNext = pBlock->pNext;
|
||||
m_oldCachedSize -= pBlock->blockSize;
|
||||
m_totalAllocated -= (pBlock->blockSize + ARENA_BLOCK_ALIGN);
|
||||
m_totalAllocated -= pBlock->blockSize;
|
||||
this->DefaultAllocator::Free(pBlock);
|
||||
pBlock = pNext;
|
||||
}
|
||||
|
@ -175,6 +181,11 @@ struct CachingAllocatorT : DefaultAllocator
|
|||
}
|
||||
}
|
||||
|
||||
if (doFree)
|
||||
{
|
||||
SWR_REL_ASSERT(m_oldCachedSize == 0, "Unknown whereabouts of 0x%llx bytes", (uint64_t)m_oldCachedSize);
|
||||
}
|
||||
|
||||
m_oldCachedSize += m_cachedSize;
|
||||
m_cachedSize = 0;
|
||||
}
|
||||
|
@ -304,7 +315,7 @@ private:
|
|||
// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
|
||||
static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
|
||||
static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
|
||||
static const size_t MAX_UNUSED_SIZE = 20 * sizeof(MEGABYTE);
|
||||
static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
|
||||
|
||||
ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
|
||||
ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
|
||||
|
@ -346,7 +357,7 @@ public:
|
|||
|
||||
if ((offset + size) <= pCurBlock->blockSize)
|
||||
{
|
||||
void* pMem = PtrAdd(pCurBlock, offset + ARENA_BLOCK_ALIGN);
|
||||
void* pMem = PtrAdd(pCurBlock, offset);
|
||||
m_offset = offset + size;
|
||||
return pMem;
|
||||
}
|
||||
|
@ -355,24 +366,21 @@ public:
|
|||
// a new block
|
||||
}
|
||||
|
||||
static const size_t ArenaBlockSize = BlockSizeT - ARENA_BLOCK_ALIGN;
|
||||
size_t blockSize = std::max(size, ArenaBlockSize);
|
||||
static const size_t ArenaBlockSize = BlockSizeT;
|
||||
size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
|
||||
|
||||
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
|
||||
blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
|
||||
|
||||
void *pMem = m_allocator.AllocateAligned(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
|
||||
SWR_ASSERT(pMem != nullptr);
|
||||
|
||||
ArenaBlock* pNewBlock = new (pMem) ArenaBlock();
|
||||
ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
|
||||
SWR_ASSERT(pNewBlock != nullptr);
|
||||
|
||||
if (pNewBlock != nullptr)
|
||||
{
|
||||
m_offset = 0;
|
||||
m_offset = ARENA_BLOCK_ALIGN;
|
||||
pNewBlock->pNext = m_pCurBlock;
|
||||
|
||||
m_pCurBlock = pNewBlock;
|
||||
m_pCurBlock->blockSize = blockSize;
|
||||
}
|
||||
|
||||
return AllocAligned(size, align);
|
||||
|
@ -407,7 +415,7 @@ public:
|
|||
|
||||
void Reset(bool removeAll = false)
|
||||
{
|
||||
m_offset = 0;
|
||||
m_offset = ARENA_BLOCK_ALIGN;
|
||||
|
||||
if (m_pCurBlock)
|
||||
{
|
||||
|
@ -431,13 +439,13 @@ public:
|
|||
|
||||
bool IsEmpty()
|
||||
{
|
||||
return (m_pCurBlock == nullptr) || (m_offset == 0 && m_pCurBlock->pNext == nullptr);
|
||||
return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
ArenaBlock* m_pCurBlock = nullptr;
|
||||
size_t m_offset = 0;
|
||||
size_t m_offset = ARENA_BLOCK_ALIGN;
|
||||
|
||||
/// @note Mutex is only used by sync allocation functions.
|
||||
std::mutex m_mutex;
|
||||
|
|
|
@ -80,9 +80,10 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
|
|||
SWR_ASSERT(pTaskData != nullptr);
|
||||
|
||||
// Ensure spill fill memory has been allocated.
|
||||
if (pSpillFillBuffer == nullptr)
|
||||
size_t spillFillSize = pDC->pState->state.totalSpillFillSize;
|
||||
if (spillFillSize && pSpillFillBuffer == nullptr)
|
||||
{
|
||||
pSpillFillBuffer = pDC->pArena->AllocAlignedSync(pDC->pState->state.totalSpillFillSize, sizeof(float) * 8);
|
||||
pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
|
||||
}
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
|
|
Loading…
Reference in New Issue