[dxvk] Add high-priority queue for shader compiles

As well as an API to queue shaders as high priority.
This commit is contained in:
Philip Rebohle 2022-08-11 02:37:36 +02:00
parent f09f11aad0
commit c3a53127d7
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
4 changed files with 133 additions and 23 deletions

View File

@ -213,6 +213,12 @@ namespace dxvk {
}
void DxvkDevice::requestCompileShader(
const Rc<DxvkShader>& shader) {
m_objects.pipelineManager().requestCompileShader(shader);
}
void DxvkDevice::presentImage(
const Rc<vk::Presenter>& presenter,
DxvkSubmitStatus* status) {

View File

@ -380,6 +380,13 @@ namespace dxvk {
void registerShader(
const Rc<DxvkShader>& shader);
/**
* \brief Prioritizes compilation of a given shader
* \param [in] shader Shader to start compiling
*/
void requestCompileShader(
const Rc<DxvkShader>& shader);
/**
* \brief Presents a swap chain image
*

View File

@ -7,17 +7,9 @@
namespace dxvk {
DxvkPipelineWorkers::DxvkPipelineWorkers(
DxvkDevice* device) {
// Use a reasonably large number of threads for compiling, but
// leave some cores to the application to avoid excessive stutter
uint32_t numCpuCores = dxvk::thread::hardware_concurrency();
m_workerCount = ((std::max(1u, numCpuCores) - 1) * 5) / 7;
DxvkDevice* device)
: m_device(device) {
if (m_workerCount < 1) m_workerCount = 1;
if (m_workerCount > 32) m_workerCount = 32;
if (device->config().numCompilerThreads > 0)
m_workerCount = device->config().numCompilerThreads;
}
@ -27,7 +19,8 @@ namespace dxvk {
void DxvkPipelineWorkers::compilePipelineLibrary(
DxvkShaderPipelineLibrary* library) {
DxvkShaderPipelineLibrary* library,
DxvkPipelinePriority priority) {
std::unique_lock lock(m_queueLock);
this->startWorkers();
@ -36,7 +29,13 @@ namespace dxvk {
PipelineLibraryEntry e = { };
e.pipelineLibrary = library;
m_queuedLibraries.push(e);
if (priority == DxvkPipelinePriority::High) {
m_queuedLibrariesPrioritized.push(e);
m_queueCondPrioritized.notify_one();
} else {
m_queuedLibraries.push(e);
}
m_queueCond.notify_one();
}
@ -100,14 +99,37 @@ namespace dxvk {
void DxvkPipelineWorkers::startWorkers() {
if (!m_workersRunning) {
// Use all available cores by default
uint32_t workerCount = dxvk::thread::hardware_concurrency();
if (workerCount < 1) workerCount = 1;
if (workerCount > 64) workerCount = 64;
// Reduce worker count on 32-bit to save adderss space
if (env::is32BitHostPlatform())
workerCount = std::min(workerCount, 16u);
if (m_device->config().numCompilerThreads > 0)
workerCount = m_device->config().numCompilerThreads;
// Number of workers that can process pipeline pipelines with normal
// priority. Any other workers can only build high-priority pipelines.
uint32_t npWorkerCount = m_device->canUseGraphicsPipelineLibrary()
? std::max(((workerCount - 1) * 5) / 7, 1u)
: workerCount;
uint32_t hpWorkerCount = workerCount - npWorkerCount;
Logger::info(str::format("DXVK: Using ", npWorkerCount, " + ", hpWorkerCount, " compiler threads"));
m_workers.resize(npWorkerCount + hpWorkerCount);
// Set worker flag so that they don't exit immediately
m_workersRunning = true;
Logger::info(str::format("DXVK: Using ", m_workerCount, " compiler threads"));
m_workers.resize(m_workerCount);
for (auto& worker : m_workers) {
worker = dxvk::thread([this] { runWorker(); });
worker.set_priority(ThreadPriority::Lowest);
for (size_t i = 0; i < m_workers.size(); i++) {
m_workers[i] = i >= npWorkerCount
? dxvk::thread([this] { runWorkerPrioritized(); })
: dxvk::thread([this] { runWorker(); });
m_workers[i].set_priority(ThreadPriority::Lowest);
}
}
}
@ -124,6 +146,7 @@ namespace dxvk {
m_queueCond.wait(lock, [this] {
return !m_workersRunning
|| !m_queuedLibrariesPrioritized.empty()
|| !m_queuedLibraries.empty()
|| !m_queuedPipelines.empty();
});
@ -132,6 +155,9 @@ namespace dxvk {
// Skip pending work, exiting early is
// more important in this case.
break;
} else if (!m_queuedLibrariesPrioritized.empty()) {
l = m_queuedLibrariesPrioritized.front();
m_queuedLibrariesPrioritized.pop();
} else if (!m_queuedLibraries.empty()) {
l = m_queuedLibraries.front();
m_queuedLibraries.pop();
@ -162,6 +188,34 @@ namespace dxvk {
}
void DxvkPipelineWorkers::runWorkerPrioritized() {
env::setThreadName("dxvk-shader-p");
while (true) {
PipelineLibraryEntry l = { };
{ std::unique_lock lock(m_queueLock);
m_queueCondPrioritized.wait(lock, [this] {
return !m_workersRunning
|| !m_queuedLibrariesPrioritized.empty();
});
if (!m_workersRunning)
break;
l = m_queuedLibrariesPrioritized.front();
m_queuedLibrariesPrioritized.pop();
}
if (l.pipelineLibrary)
l.pipelineLibrary->compilePipeline();
m_pendingTasks -= 1;
}
}
DxvkPipelineManager::DxvkPipelineManager(
DxvkDevice* device)
: m_device (device),
@ -285,13 +339,30 @@ namespace dxvk {
const Rc<DxvkShader>& shader) {
if (canPrecompileShader(shader)) {
auto library = createPipelineLibrary(shader);
m_workers.compilePipelineLibrary(library);
m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::Normal);
}
m_stateCache.registerShader(shader);
}
void DxvkPipelineManager::requestCompileShader(
const Rc<DxvkShader>& shader) {
if (!shader->needsLibraryCompile())
return;
// Dispatch high-priority compile job
auto library = findPipelineLibrary(shader);
if (library)
m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::High);
// Notify immediately so that this only gets called
// once, even if compilation does ot start immediately
shader->notifyLibraryCompile();
}
DxvkPipelineCount DxvkPipelineManager::getPipelineCount() const {
DxvkPipelineCount result;
result.numGraphicsPipelines = m_stats.numGraphicsPipelines.load();

View File

@ -34,6 +34,14 @@ namespace dxvk {
std::atomic<uint32_t> numComputePipelines = { 0u };
};
/**
* \brief Pipeline priority
*/
enum class DxvkPipelinePriority : uint32_t {
Normal = 0,
High = 1,
};
/**
* \brief Pipeline manager worker threads
*
@ -56,9 +64,11 @@ namespace dxvk {
* the pipeline with default compile arguments.
* Note that pipeline libraries are high priority.
* \param [in] library The pipeline library
* \param [in] priority Pipeline priority
*/
void compilePipelineLibrary(
DxvkShaderPipelineLibrary* library);
DxvkShaderPipelineLibrary* library,
DxvkPipelinePriority priority);
/**
* \brief Compiles an optimized compute pipeline
@ -107,15 +117,18 @@ namespace dxvk {
DxvkShaderPipelineLibrary* pipelineLibrary;
};
DxvkDevice* m_device;
std::atomic<uint64_t> m_pendingTasks = { 0ull };
dxvk::mutex m_queueLock;
dxvk::condition_variable m_queueCond;
dxvk::condition_variable m_queueCondPrioritized;
std::queue<PipelineLibraryEntry> m_queuedLibrariesPrioritized;
std::queue<PipelineLibraryEntry> m_queuedLibraries;
std::queue<PipelineEntry> m_queuedPipelines;
uint32_t m_workerCount = 0;
bool m_workersRunning = false;
std::vector<dxvk::thread> m_workers;
@ -123,6 +136,8 @@ namespace dxvk {
void runWorker();
void runWorkerPrioritized();
};
@ -188,7 +203,7 @@ namespace dxvk {
DxvkGraphicsPipelineFragmentOutputLibrary* createFragmentOutputLibrary(
const DxvkGraphicsPipelineFragmentOutputState& state);
/*
/**
* \brief Registers a shader
*
* Starts compiling pipelines asynchronously
@ -198,7 +213,18 @@ namespace dxvk {
*/
void registerShader(
const Rc<DxvkShader>& shader);
/**
* \brief Prioritizes compilation of a given shader
*
* Adds the pipeline library for the given shader
* to the high-priority queue of the background
* workers to make sure it gets compiled quickly.
* \param [in] shader Newly compiled shader
*/
void requestCompileShader(
const Rc<DxvkShader>& shader);
/**
* \brief Retrieves total pipeline count
* \returns Number of compute/graphics pipelines