[dxvk] Add CS chunk pool

Reduces the number of dynamic memory allocations for CS chunks by
recycling them once they are no longer needed. Also fixes a potential
issue with chunks that are dispatched multiple times.
This commit is contained in:
Philip Rebohle 2018-08-27 16:07:38 +02:00
parent b75b7d0999
commit f1b075c0f3
11 changed files with 217 additions and 34 deletions

View File

@ -41,8 +41,7 @@ namespace dxvk {
}
void D3D11CommandList::AddChunk(
Rc<DxvkCsChunk>&& Chunk) {
void D3D11CommandList::AddChunk(DxvkCsChunkRef&& Chunk) {
m_chunks.push_back(std::move(Chunk));
}
@ -59,7 +58,7 @@ namespace dxvk {
void D3D11CommandList::EmitToCsThread(DxvkCsThread* CsThread) {
for (const auto& chunk : m_chunks)
CsThread->dispatchChunk(Rc<DxvkCsChunk>(chunk));
CsThread->dispatchChunk(DxvkCsChunkRef(chunk));
MarkSubmitted();
}

View File

@ -24,7 +24,7 @@ namespace dxvk {
UINT STDMETHODCALLTYPE GetContextFlags() final;
void AddChunk(
Rc<DxvkCsChunk>&& Chunk);
DxvkCsChunkRef&& Chunk);
void EmitToCommandList(
ID3D11CommandList* pCommandList);
@ -37,7 +37,7 @@ namespace dxvk {
D3D11Device* const m_device;
UINT const m_contextFlags;
std::vector<Rc<DxvkCsChunk>> m_chunks;
std::vector<DxvkCsChunkRef> m_chunks;
std::atomic<bool> m_submitted = { false };
std::atomic<bool> m_warned = { false };

View File

@ -15,7 +15,7 @@ namespace dxvk {
: m_parent (pParent),
m_annotation(this),
m_device (Device),
m_csChunk (new DxvkCsChunk()) {
m_csChunk (AllocCsChunk()) {
// Create default state objects. We won't ever return them
// to the application, but we'll use them to apply state.
Com<ID3D11BlendState> defaultBlendState;
@ -282,11 +282,11 @@ namespace dxvk {
void STDMETHODCALLTYPE D3D11DeviceContext::CopySubresourceRegion1(
ID3D11Resource* pDstResource,
UINT DstSubresource,
UINT DstX,
UINT DstX,
UINT DstY,
UINT DstZ,
ID3D11Resource* pSrcResource,
UINT SrcSubresource,
UINT DstZ,
ID3D11Resource* pSrcResource,
UINT SrcSubresource,
const D3D11_BOX* pSrcBox,
UINT CopyFlags) {
D3D11_RESOURCE_DIMENSION dstResourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN;
@ -3204,4 +3204,9 @@ namespace dxvk {
}
}
DxvkCsChunkRef D3D11DeviceContext::AllocCsChunk() {
return m_parent->AllocCsChunk();
}
}

View File

@ -643,9 +643,10 @@ namespace dxvk {
D3D11UserDefinedAnnotation m_annotation;
Rc<DxvkDevice> m_device;
Rc<DxvkCsChunk> m_csChunk;
Rc<DxvkDataBuffer> m_updateBuffer;
DxvkCsChunkRef m_csChunk;
Com<D3D11BlendState> m_defaultBlendState;
Com<D3D11DepthStencilState> m_defaultDepthStencilState;
Com<D3D11RasterizerState> m_defaultRasterizerState;
@ -779,6 +780,8 @@ namespace dxvk {
DxvkDataSlice AllocUpdateBufferSlice(size_t Size);
DxvkCsChunkRef AllocCsChunk();
template<typename T>
const D3D11CommonShader* GetCommonShader(T* pShader) const {
return pShader != nullptr ? pShader->GetCommonShader() : nullptr;
@ -789,7 +792,7 @@ namespace dxvk {
if (!m_csChunk->push(command)) {
EmitCsChunk(std::move(m_csChunk));
m_csChunk = new DxvkCsChunk();
m_csChunk = AllocCsChunk();
m_csChunk->push(command);
}
}
@ -797,11 +800,11 @@ namespace dxvk {
void FlushCsChunk() {
if (m_csChunk->commandCount() != 0) {
EmitCsChunk(std::move(m_csChunk));
m_csChunk = new DxvkCsChunk();
m_csChunk = AllocCsChunk();
}
}
virtual void EmitCsChunk(Rc<DxvkCsChunk>&& chunk) = 0;
virtual void EmitCsChunk(DxvkCsChunkRef&& chunk) = 0;
};

View File

@ -299,7 +299,7 @@ namespace dxvk {
}
void D3D11DeferredContext::EmitCsChunk(Rc<DxvkCsChunk>&& chunk) {
void D3D11DeferredContext::EmitCsChunk(DxvkCsChunkRef&& chunk) {
m_commandList->AddChunk(std::move(chunk));
}

View File

@ -97,7 +97,7 @@ namespace dxvk {
Com<D3D11CommandList> CreateCommandList();
void EmitCsChunk(Rc<DxvkCsChunk>&& chunk) final;
void EmitCsChunk(DxvkCsChunkRef&& chunk) final;
auto FindMapEntry(ID3D11Resource* pResource, UINT Subresource) {
return std::find_if(m_mappedResources.rbegin(), m_mappedResources.rend(),

View File

@ -544,7 +544,7 @@ namespace dxvk {
}
void D3D11ImmediateContext::EmitCsChunk(Rc<DxvkCsChunk>&& chunk) {
void D3D11ImmediateContext::EmitCsChunk(DxvkCsChunkRef&& chunk) {
m_csThread.dispatchChunk(std::move(chunk));
m_csIsBusy = true;
}

View File

@ -151,7 +151,7 @@ namespace dxvk {
const Rc<DxvkResource>& Resource,
UINT MapFlags);
void EmitCsChunk(Rc<DxvkCsChunk>&& chunk) final;
void EmitCsChunk(DxvkCsChunkRef&& chunk) final;
void FlushImplicit();

View File

@ -7,6 +7,8 @@
#include "../dxgi/dxgi_object.h"
#include "../dxvk/dxvk_cs.h"
#include "../d3d10/d3d10_device.h"
#include "../util/com/com_private_data.h"
@ -338,7 +340,12 @@ namespace dxvk {
void FreeCounterSlice(const DxvkBufferSlice& Slice) {
m_uavCounters->FreeSlice(Slice);
}
DxvkCsChunkRef AllocCsChunk() {
DxvkCsChunk* chunk = m_csChunkPool.allocChunk();
return DxvkCsChunkRef(chunk, &m_csChunkPool);
}
const D3D11Options* GetOptions() const {
return &m_d3d11Options;
}
@ -369,6 +376,8 @@ namespace dxvk {
const D3D11Options m_d3d11Options;
const DxbcOptions m_dxbcOptions;
DxvkCsChunkPool m_csChunkPool;
D3D11Initializer* m_initializer = nullptr;
D3D11UavCounterAllocator* m_uavCounters = nullptr;
D3D11ImmediateContext* m_context = nullptr;

View File

@ -8,13 +8,7 @@ namespace dxvk {
DxvkCsChunk::~DxvkCsChunk() {
auto cmd = m_head;
while (cmd != nullptr) {
auto next = cmd->next();
cmd->~DxvkCsCmd();
cmd = next;
}
this->reset();
}
@ -22,8 +16,17 @@ namespace dxvk {
auto cmd = m_head;
while (cmd != nullptr) {
auto next = cmd->next();
cmd->exec(ctx);
cmd = cmd->next();
}
}
void DxvkCsChunk::reset() {
auto cmd = m_head;
while (cmd != nullptr) {
auto next = cmd->next();
cmd->~DxvkCsCmd();
cmd = next;
}
@ -36,6 +39,39 @@ namespace dxvk {
}
DxvkCsChunkPool::DxvkCsChunkPool() {
}
DxvkCsChunkPool::~DxvkCsChunkPool() {
for (DxvkCsChunk* chunk : m_chunks)
delete chunk;
}
DxvkCsChunk* DxvkCsChunkPool::allocChunk() {
{ std::lock_guard<sync::Spinlock> lock(m_mutex);
if (m_chunks.size() != 0) {
DxvkCsChunk* chunk = m_chunks.back();
m_chunks.pop_back();
return chunk;
}
}
return new DxvkCsChunk();
}
void DxvkCsChunkPool::freeChunk(DxvkCsChunk* chunk) {
chunk->reset();
std::lock_guard<sync::Spinlock> lock(m_mutex);
m_chunks.push_back(chunk);
}
DxvkCsThread::DxvkCsThread(const Rc<DxvkContext>& context)
: m_context(context), m_thread([this] { threadFunc(); }) {
@ -52,7 +88,7 @@ namespace dxvk {
}
void DxvkCsThread::dispatchChunk(Rc<DxvkCsChunk>&& chunk) {
void DxvkCsThread::dispatchChunk(DxvkCsChunkRef&& chunk) {
{ std::unique_lock<std::mutex> lock(m_mutex);
m_chunksQueued.push(std::move(chunk));
m_chunksPending += 1;
@ -74,15 +110,15 @@ namespace dxvk {
void DxvkCsThread::threadFunc() {
env::setThreadName(L"dxvk-cs");
Rc<DxvkCsChunk> chunk;
DxvkCsChunkRef chunk;
while (!m_stopped.load()) {
{ std::unique_lock<std::mutex> lock(m_mutex);
if (chunk != nullptr) {
if (chunk) {
if (--m_chunksPending == 0)
m_condOnSync.notify_one();
chunk = nullptr;
chunk = DxvkCsChunkRef();
}
if (m_chunksQueued.size() == 0) {
@ -98,7 +134,7 @@ namespace dxvk {
}
}
if (chunk != nullptr)
if (chunk)
chunk->executeAll(m_context.ptr());
}
}

View File

@ -145,6 +145,15 @@ namespace dxvk {
*/
void executeAll(DxvkContext* ctx);
/**
* \brief Resets chunk
*
* Destroys all recorded commands and
* marks the chunk itself as empty, so
* that it can be reused later.
*/
void reset();
private:
size_t m_commandCount = 0;
@ -159,6 +168,128 @@ namespace dxvk {
};
/**
* \brief Chunk pool
*
* Implements a pool of CS chunks which can be
* recycled. The goal is to reduce the number
* of dynamic memory allocations.
*/
class DxvkCsChunkPool {
public:
DxvkCsChunkPool();
~DxvkCsChunkPool();
DxvkCsChunkPool (const DxvkCsChunkPool&) = delete;
DxvkCsChunkPool& operator = (const DxvkCsChunkPool&) = delete;
/**
* \brief Allocates a chunk
*
* Takes an existing chunk from the pool,
* or creates a new one if necessary.
* \returns Allocated chunk object
*/
DxvkCsChunk* allocChunk();
/**
* \brief Releases a chunk
*
* Resets the chunk and adds it to the pool.
* \param [in] chunk Chunk to release
*/
void freeChunk(DxvkCsChunk* chunk);
private:
sync::Spinlock m_mutex;
std::vector<DxvkCsChunk*> m_chunks;
};
/**
* \brief Chunk reference
*
* Implements basic reference counting for
* CS chunks and returns them to the pool
* as soon as they are no longer needed.
*/
class DxvkCsChunkRef {
public:
DxvkCsChunkRef() { }
DxvkCsChunkRef(
DxvkCsChunk* chunk,
DxvkCsChunkPool* pool)
: m_chunk (chunk),
m_pool (pool) {
this->incRef();
}
DxvkCsChunkRef(const DxvkCsChunkRef& other)
: m_chunk (other.m_chunk),
m_pool (other.m_pool) {
this->incRef();
}
DxvkCsChunkRef(DxvkCsChunkRef&& other)
: m_chunk (other.m_chunk),
m_pool (other.m_pool) {
other.m_chunk = nullptr;
other.m_pool = nullptr;
}
DxvkCsChunkRef& operator = (const DxvkCsChunkRef& other) {
other.incRef();
this->decRef();
this->m_chunk = other.m_chunk;
this->m_pool = other.m_pool;
return *this;
}
DxvkCsChunkRef& operator = (DxvkCsChunkRef&& other) {
this->decRef();
this->m_chunk = other.m_chunk;
this->m_pool = other.m_pool;
other.m_chunk = nullptr;
other.m_pool = nullptr;
return *this;
}
~DxvkCsChunkRef() {
this->decRef();
}
DxvkCsChunk* operator -> () const {
return m_chunk;
}
operator bool () const {
return m_chunk != nullptr;
}
private:
DxvkCsChunk* m_chunk = nullptr;
DxvkCsChunkPool* m_pool = nullptr;
void incRef() const {
if (m_chunk != nullptr)
m_chunk->incRef();
}
void decRef() const {
if (m_chunk != nullptr && m_chunk->decRef() == 0)
m_pool->freeChunk(m_chunk);
}
};
/**
* \brief Command stream thread
*
@ -179,7 +310,7 @@ namespace dxvk {
* command lists recorded on another thread.
* \param [in] chunk The chunk to dispatch
*/
void dispatchChunk(Rc<DxvkCsChunk>&& chunk);
void dispatchChunk(DxvkCsChunkRef&& chunk);
/**
* \brief Synchronizes with the thread
@ -199,7 +330,7 @@ namespace dxvk {
std::mutex m_mutex;
std::condition_variable m_condOnAdd;
std::condition_variable m_condOnSync;
std::queue<Rc<DxvkCsChunk>> m_chunksQueued;
std::queue<DxvkCsChunkRef> m_chunksQueued;
dxvk::thread m_thread;
uint32_t m_chunksPending = 0;