Optimized command submission

Command submission now does not synchronize with the device every single
time. Instead, the command list and the fence that was created for it are
added to a queue. A separate thread will then wait for the execution to
complete and return the command list to the device.
This commit is contained in:
Philip Rebohle 2017-12-16 18:10:55 +01:00
parent d5a49698b4
commit 1fe5b74762
7 changed files with 143 additions and 19 deletions

View File

@ -12,7 +12,8 @@ namespace dxvk {
m_features (features),
m_memory (new DxvkMemoryAllocator(adapter, vkd)),
m_renderPassPool (new DxvkRenderPassPool (vkd)),
m_pipelineManager (new DxvkPipelineManager(vkd)) {
m_pipelineManager (new DxvkPipelineManager(vkd)),
m_submissionQueue (this) {
m_vkd->vkGetDeviceQueue(m_vkd->device(),
m_adapter->graphicsQueueFamily(), 0,
&m_graphicsQueue);
@ -214,12 +215,8 @@ namespace dxvk {
waitSemaphore, wakeSemaphore, fence->handle());
}
// TODO Delay synchronization by putting these into a ring buffer
fence->wait(std::numeric_limits<uint64_t>::max());
commandList->reset();
// FIXME this must go away once the ring buffer is implemented
m_recycledCommandLists.returnObject(commandList);
// Add this to the set of running submissions
m_submissionQueue.submit(fence, commandList);
m_statCounters.increment(DxvkStat::DevQueueSubmissions, 1);
return fence;
}
@ -232,4 +229,9 @@ namespace dxvk {
throw DxvkError("DxvkDevice::waitForIdle: Operation failed");
}
void DxvkDevice::recycleCommandList(const Rc<DxvkCommandList>& cmdList) {
m_recycledCommandLists.returnObject(cmdList);
}
}

View File

@ -9,6 +9,7 @@
#include "dxvk_image.h"
#include "dxvk_memory.h"
#include "dxvk_pipemanager.h"
#include "dxvk_queue.h"
#include "dxvk_recycler.h"
#include "dxvk_renderpass.h"
#include "dxvk_sampler.h"
@ -30,6 +31,8 @@ namespace dxvk {
* contexts. Multiple contexts can be created for a device.
*/
class DxvkDevice : public RcObject {
friend class DxvkSubmissionQueue;
constexpr static VkDeviceSize DefaultStagingBufferSize = 64 * 1024 * 1024;
public:
@ -308,6 +311,11 @@ namespace dxvk {
DxvkStatCounters m_statCounters;
DxvkSubmissionQueue m_submissionQueue;
void recycleCommandList(
const Rc<DxvkCommandList>& cmdList);
};
}

View File

@ -58,23 +58,20 @@ namespace dxvk {
: m_heap (heap),
m_memory(memory),
m_mapPtr(mapPtr),
m_size (size),
m_free (size) {
TRACE(this);
m_size (size) {
// Mark the entire chunk as free
m_freeList.push_back(FreeSlice { 0, size });
}
DxvkMemoryChunk::~DxvkMemoryChunk() {
TRACE(this);
m_heap->freeDeviceMemory(m_memory);
}
DxvkMemory DxvkMemoryChunk::alloc(VkDeviceSize size, VkDeviceSize align) {
// Fast exit if the chunk is full already
if (size > m_free)
// If the chunk is full, return
if (m_freeList.size() == 0)
return DxvkMemory();
// Select the slice to allocate from in a worst-fit
@ -82,8 +79,12 @@ namespace dxvk {
auto bestSlice = m_freeList.begin();
for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) {
if (slice->length > bestSlice->length)
if (slice->length == size) {
bestSlice = slice;
break;
} else if (slice->length > bestSlice->length) {
bestSlice = slice;
}
}
// We need to align the allocation to the requested alignment
@ -99,7 +100,6 @@ namespace dxvk {
// We can use this slice, but we'll have to add
// the unused parts of it back to the free list.
m_freeList.erase(bestSlice);
m_free -= size;
if (allocStart != sliceStart)
m_freeList.push_back({ sliceStart, allocStart - sliceStart });
@ -108,6 +108,7 @@ namespace dxvk {
m_freeList.push_back({ allocEnd, sliceEnd - allocEnd });
// Create the memory object with the aligned slice
m_delta++;
return DxvkMemory(this, m_heap,
m_memory, allocStart, allocEnd - allocStart,
reinterpret_cast<char*>(m_mapPtr) + allocStart);
@ -117,8 +118,6 @@ namespace dxvk {
void DxvkMemoryChunk::free(
VkDeviceSize offset,
VkDeviceSize length) {
m_free += length;
// Remove adjacent entries from the free list and then add
// a new slice that covers all those entries. Without doing
// so, the slice could not be reused for larger allocations.
@ -137,6 +136,7 @@ namespace dxvk {
}
}
m_delta--;
m_freeList.push_back({ offset, length });
}

View File

@ -130,8 +130,7 @@ namespace dxvk {
VkDeviceMemory const m_memory;
void* const m_mapPtr;
VkDeviceSize const m_size;
VkDeviceSize m_free = 0;
size_t m_delta = 0;
std::vector<FreeSlice> m_freeList;
};

62
src/dxvk/dxvk_queue.cpp Normal file
View File

@ -0,0 +1,62 @@
#include "dxvk_device.h"
#include "dxvk_queue.h"
namespace dxvk {
DxvkSubmissionQueue::DxvkSubmissionQueue(DxvkDevice* device)
: m_device(device),
m_thread([this] () { this->threadFunc(); }) {
}
DxvkSubmissionQueue::~DxvkSubmissionQueue() {
m_stopped.store(true);
m_condOnAdd.notify_one();
m_thread.join();
}
void DxvkSubmissionQueue::submit(
const Rc<DxvkFence>& fence,
const Rc<DxvkCommandList>& cmdList) {
{ std::unique_lock<std::mutex> lock(m_mutex);
m_condOnTake.wait(lock, [this] {
return m_entries.size() < 4;
});
m_entries.push({ fence, cmdList });
}
m_condOnAdd.notify_one();
}
void DxvkSubmissionQueue::threadFunc() {
while (!m_stopped.load()) {
Entry entry;
{ std::unique_lock<std::mutex> lock(m_mutex);
m_condOnAdd.wait(lock, [this] {
return m_stopped.load() || (m_entries.size() != 0);
});
if (m_entries.size() != 0) {
entry = std::move(m_entries.front());
m_entries.pop();
}
}
m_condOnTake.notify_one();
if (entry.fence != nullptr) {
entry.fence->wait(std::numeric_limits<uint64_t>::max());
entry.cmdList->reset();
m_device->recycleCommandList(entry.cmdList);
}
}
}
}

52
src/dxvk/dxvk_queue.h Normal file
View File

@ -0,0 +1,52 @@
#pragma once
#include <condition_variable>
#include <mutex>
#include <queue>
#include <thread>
#include "dxvk_cmdlist.h"
#include "dxvk_sync.h"
namespace dxvk {
class DxvkDevice;
/**
* \brief Submission queue
*
*
*/
class DxvkSubmissionQueue {
public:
DxvkSubmissionQueue(DxvkDevice* device);
~DxvkSubmissionQueue();
void submit(
const Rc<DxvkFence>& fence,
const Rc<DxvkCommandList>& cmdList);
private:
struct Entry {
Rc<DxvkFence> fence;
Rc<DxvkCommandList> cmdList;
};
DxvkDevice* m_device;
std::atomic<bool> m_stopped = { false };
std::mutex m_mutex;
std::condition_variable m_condOnAdd;
std::condition_variable m_condOnTake;
std::queue<Entry> m_entries;
std::thread m_thread;
void threadFunc();
};
}

View File

@ -18,6 +18,7 @@ dxvk_src = files([
'dxvk_memory.cpp',
'dxvk_pipelayout.cpp',
'dxvk_pipemanager.cpp',
'dxvk_queue.cpp',
'dxvk_renderpass.cpp',
'dxvk_resource.cpp',
'dxvk_sampler.cpp',