[dxvk] Add high-priority queue for shader compiles

As well as an API to queue shaders as high priority.
2022-08-11 02:37:36 +02:00 · 2022-08-11 02:37:36 +02:00 · c3a53127d7
parent f09f11aad0
commit c3a53127d7
4 changed files with 133 additions and 23 deletions
--- a/src/dxvk/dxvk_device.cpp
+++ b/src/dxvk/dxvk_device.cpp
@ -213,6 +213,12 @@ namespace dxvk {
  }
  
  
+  void DxvkDevice::requestCompileShader(
+    const Rc<DxvkShader>&           shader) {
+    m_objects.pipelineManager().requestCompileShader(shader);
+  }
+
+
  void DxvkDevice::presentImage(
    const Rc<vk::Presenter>&        presenter,
          DxvkSubmitStatus*         status) {
--- a/src/dxvk/dxvk_device.h
+++ b/src/dxvk/dxvk_device.h
@ -380,6 +380,13 @@ namespace dxvk {
    void registerShader(
      const Rc<DxvkShader>&         shader);
    
+    /**
+     * \brief Prioritizes compilation of a given shader
+     * \param [in] shader Shader to start compiling
+     */
+    void requestCompileShader(
+      const Rc<DxvkShader>&         shader);
+
    /**
     * \brief Presents a swap chain image
     * 
--- a/src/dxvk/dxvk_pipemanager.cpp
+++ b/src/dxvk/dxvk_pipemanager.cpp
@ -7,17 +7,9 @@
 namespace dxvk {
  
  DxvkPipelineWorkers::DxvkPipelineWorkers(
-          DxvkDevice*                     device) {
-    // Use a reasonably large number of threads for compiling, but
-    // leave some cores to the application to avoid excessive stutter
-    uint32_t numCpuCores = dxvk::thread::hardware_concurrency();
-    m_workerCount = ((std::max(1u, numCpuCores) - 1) * 5) / 7;
+          DxvkDevice*                     device)
+  : m_device(device) {

-    if (m_workerCount <  1) m_workerCount =  1;
-    if (m_workerCount > 32) m_workerCount = 32;
-
-    if (device->config().numCompilerThreads > 0)
-      m_workerCount = device->config().numCompilerThreads;
  }


@ -27,7 +19,8 @@ namespace dxvk {


  void DxvkPipelineWorkers::compilePipelineLibrary(
-          DxvkShaderPipelineLibrary*      library) {
+          DxvkShaderPipelineLibrary*      library,
+          DxvkPipelinePriority            priority) {
    std::unique_lock lock(m_queueLock);
    this->startWorkers();

@ -36,7 +29,13 @@ namespace dxvk {
    PipelineLibraryEntry e = { };
    e.pipelineLibrary = library;

-    m_queuedLibraries.push(e);
+    if (priority == DxvkPipelinePriority::High) {
+      m_queuedLibrariesPrioritized.push(e);
+      m_queueCondPrioritized.notify_one();
+    } else {
+      m_queuedLibraries.push(e);
+    }
+
    m_queueCond.notify_one();
  }

@ -100,14 +99,37 @@ namespace dxvk {

  void DxvkPipelineWorkers::startWorkers() {
    if (!m_workersRunning) {
+      // Use all available cores by default
+      uint32_t workerCount = dxvk::thread::hardware_concurrency();
+
+      if (workerCount <  1) workerCount =  1;
+      if (workerCount > 64) workerCount = 64;
+
+      // Reduce worker count on 32-bit to save adderss space
+      if (env::is32BitHostPlatform())
+        workerCount = std::min(workerCount, 16u);
+
+      if (m_device->config().numCompilerThreads > 0)
+        workerCount = m_device->config().numCompilerThreads;
+
+      // Number of workers that can process pipeline pipelines with normal
+      // priority. Any other workers can only build high-priority pipelines.
+      uint32_t npWorkerCount = m_device->canUseGraphicsPipelineLibrary()
+        ? std::max(((workerCount - 1) * 5) / 7, 1u)
+        : workerCount;
+      uint32_t hpWorkerCount = workerCount - npWorkerCount;
+
+      Logger::info(str::format("DXVK: Using ", npWorkerCount, " + ", hpWorkerCount, " compiler threads"));
+      m_workers.resize(npWorkerCount + hpWorkerCount);
+
+      // Set worker flag so that they don't exit immediately
      m_workersRunning = true;

-      Logger::info(str::format("DXVK: Using ", m_workerCount, " compiler threads"));
-      m_workers.resize(m_workerCount);
-
-      for (auto& worker : m_workers) {
-        worker = dxvk::thread([this] { runWorker(); });
-        worker.set_priority(ThreadPriority::Lowest);
+      for (size_t i = 0; i < m_workers.size(); i++) {
+        m_workers[i] = i >= npWorkerCount
+          ? dxvk::thread([this] { runWorkerPrioritized(); })
+          : dxvk::thread([this] { runWorker(); });
+        m_workers[i].set_priority(ThreadPriority::Lowest);
      }
    }
  }
@ -124,6 +146,7 @@ namespace dxvk {

        m_queueCond.wait(lock, [this] {
          return !m_workersRunning
+              || !m_queuedLibrariesPrioritized.empty()
              || !m_queuedLibraries.empty()
              || !m_queuedPipelines.empty();
        });
@ -132,6 +155,9 @@ namespace dxvk {
          // Skip pending work, exiting early is
          // more important in this case.
          break;
+        } else if (!m_queuedLibrariesPrioritized.empty()) {
+          l = m_queuedLibrariesPrioritized.front();
+          m_queuedLibrariesPrioritized.pop();
        } else if (!m_queuedLibraries.empty()) {
          l = m_queuedLibraries.front();
          m_queuedLibraries.pop();
@ -162,6 +188,34 @@ namespace dxvk {
  }


+  void DxvkPipelineWorkers::runWorkerPrioritized() {
+    env::setThreadName("dxvk-shader-p");
+
+    while (true) {
+      PipelineLibraryEntry l = { };
+
+      { std::unique_lock lock(m_queueLock);
+
+        m_queueCondPrioritized.wait(lock, [this] {
+          return !m_workersRunning
+              || !m_queuedLibrariesPrioritized.empty();
+        });
+
+        if (!m_workersRunning)
+          break;
+
+        l = m_queuedLibrariesPrioritized.front();
+        m_queuedLibrariesPrioritized.pop();
+      }
+
+      if (l.pipelineLibrary)
+        l.pipelineLibrary->compilePipeline();
+
+      m_pendingTasks -= 1;
+    }
+  }
+
+
  DxvkPipelineManager::DxvkPipelineManager(
          DxvkDevice*         device)
  : m_device    (device),
@ -285,13 +339,30 @@ namespace dxvk {
    const Rc<DxvkShader>&         shader) {
    if (canPrecompileShader(shader)) {
      auto library = createPipelineLibrary(shader);
-      m_workers.compilePipelineLibrary(library);
+      m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::Normal);
    }

    m_stateCache.registerShader(shader);
  }


+  void DxvkPipelineManager::requestCompileShader(
+    const Rc<DxvkShader>&         shader) {
+    if (!shader->needsLibraryCompile())
+      return;
+
+    // Dispatch high-priority compile job
+    auto library = findPipelineLibrary(shader);
+
+    if (library)
+      m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::High);
+
+    // Notify immediately so that this only gets called
+    // once, even if compilation does ot start immediately
+    shader->notifyLibraryCompile();
+  }
+
+
  DxvkPipelineCount DxvkPipelineManager::getPipelineCount() const {
    DxvkPipelineCount result;
    result.numGraphicsPipelines = m_stats.numGraphicsPipelines.load();
--- a/src/dxvk/dxvk_pipemanager.h
+++ b/src/dxvk/dxvk_pipemanager.h
@ -34,6 +34,14 @@ namespace dxvk {
    std::atomic<uint32_t> numComputePipelines   = { 0u };
  };

+  /**
+   * \brief Pipeline priority
+   */
+  enum class DxvkPipelinePriority : uint32_t {
+    Normal  = 0,
+    High    = 1,
+  };
+
  /**
   * \brief Pipeline manager worker threads
   *
@ -56,9 +64,11 @@ namespace dxvk {
     * the pipeline with default compile arguments.
     * Note that pipeline libraries are high priority.
     * \param [in] library The pipeline library
+     * \param [in] priority Pipeline priority
     */
    void compilePipelineLibrary(
-            DxvkShaderPipelineLibrary*      library);
+            DxvkShaderPipelineLibrary*      library,
+            DxvkPipelinePriority            priority);

    /**
     * \brief Compiles an optimized compute pipeline
@ -107,15 +117,18 @@ namespace dxvk {
      DxvkShaderPipelineLibrary*    pipelineLibrary;
    };

+    DxvkDevice*                       m_device;
+
    std::atomic<uint64_t>             m_pendingTasks = { 0ull };

    dxvk::mutex                       m_queueLock;
    dxvk::condition_variable          m_queueCond;
+    dxvk::condition_variable          m_queueCondPrioritized;

+    std::queue<PipelineLibraryEntry>  m_queuedLibrariesPrioritized;
    std::queue<PipelineLibraryEntry>  m_queuedLibraries;
    std::queue<PipelineEntry>         m_queuedPipelines;

-    uint32_t                          m_workerCount = 0;
    bool                              m_workersRunning = false;
    std::vector<dxvk::thread>         m_workers;

@ -123,6 +136,8 @@ namespace dxvk {

    void runWorker();

+    void runWorkerPrioritized();
+
  };

  
@ -188,7 +203,7 @@ namespace dxvk {
    DxvkGraphicsPipelineFragmentOutputLibrary* createFragmentOutputLibrary(
      const DxvkGraphicsPipelineFragmentOutputState& state);

-    /*
+    /**
     * \brief Registers a shader
     * 
     * Starts compiling pipelines asynchronously
@ -198,7 +213,18 @@ namespace dxvk {
     */
    void registerShader(
      const Rc<DxvkShader>&         shader);
-    
+
+    /**
+     * \brief Prioritizes compilation of a given shader
+     *
+     * Adds the pipeline library for the given shader
+     * to the high-priority queue of the background
+     * workers to make sure it gets compiled quickly.
+     * \param [in] shader Newly compiled shader
+     */
+    void requestCompileShader(
+      const Rc<DxvkShader>&         shader);
+
    /**
     * \brief Retrieves total pipeline count
     * \returns Number of compute/graphics pipelines