[dxvk] Implement better priority system for background shader compiles

Reduces the number of workers that perform background optimization, which may reduce the performance impact when encountering a large number of new pipelines at once.
2023-01-13 14:20:27 +01:00 · 2023-01-13 14:20:27 +01:00 · c978e62ec8
parent cc9266edaa
commit c978e62ec8
4 changed files with 103 additions and 118 deletions
--- a/src/dxvk/dxvk_graphics.cpp
+++ b/src/dxvk/dxvk_graphics.cpp
@ -938,7 +938,7 @@ namespace dxvk {

        // If necessary, compile an optimized pipeline variant
        if (!instance->fastHandle.load())
-          m_workers->compileGraphicsPipeline(this, state);
+          m_workers->compileGraphicsPipeline(this, state, DxvkPipelinePriority::Low);

        // Only store pipelines in the state cache that cannot benefit
        // from pipeline libraries, or if that feature is disabled.
--- a/src/dxvk/dxvk_pipemanager.cpp
+++ b/src/dxvk/dxvk_pipemanager.cpp
@ -21,40 +21,28 @@ namespace dxvk {
  void DxvkPipelineWorkers::compilePipelineLibrary(
          DxvkShaderPipelineLibrary*      library,
          DxvkPipelinePriority            priority) {
-    std::unique_lock lock(m_queueLock);
+    std::unique_lock lock(m_lock);
    this->startWorkers();

    m_pendingTasks += 1;

-    PipelineLibraryEntry e = { };
-    e.pipelineLibrary = library;
-
-    if (priority == DxvkPipelinePriority::High) {
-      m_queuedLibrariesPrioritized.push(e);
-      m_queueCondPrioritized.notify_one();
-    } else {
-      m_queuedLibraries.push(e);
-    }
-
-    m_queueCond.notify_one();
+    m_buckets[uint32_t(priority)].queue.emplace(library);
+    notifyWorkers(priority);
  }


  void DxvkPipelineWorkers::compileGraphicsPipeline(
          DxvkGraphicsPipeline*           pipeline,
-    const DxvkGraphicsPipelineStateInfo&  state) {
-    std::unique_lock lock(m_queueLock);
+    const DxvkGraphicsPipelineStateInfo&  state,
+          DxvkPipelinePriority            priority) {
+    std::unique_lock lock(m_lock);
    this->startWorkers();

    pipeline->acquirePipeline();
    m_pendingTasks += 1;

-    PipelineEntry e = { };
-    e.graphicsPipeline = pipeline;
-    e.graphicsState = state;
-
-    m_queuedPipelines.push(e);
-    m_queueCond.notify_one();
+    m_buckets[uint32_t(priority)].queue.emplace(pipeline, state);
+    notifyWorkers(priority);
  }


@ -64,14 +52,15 @@ namespace dxvk {


  void DxvkPipelineWorkers::stopWorkers() {
-    { std::unique_lock lock(m_queueLock);
+    { std::unique_lock lock(m_lock);

      if (!m_workersRunning)
        return;

      m_workersRunning = false;
-      m_queueCond.notify_all();
-      m_queueCondPrioritized.notify_all();
+
+      for (uint32_t i = 0; i < m_buckets.size(); i++)
+        m_buckets[i].cond.notify_all();
    }

    for (auto& worker : m_workers)
@ -81,8 +70,23 @@ namespace dxvk {
  }


+  void DxvkPipelineWorkers::notifyWorkers(DxvkPipelinePriority priority) {
+    uint32_t index = uint32_t(priority);
+
+    // If any workers are idle in a suitable set, notify the corresponding
+    // condition variable. If all workers are busy anyway, we know that the
+    // job is going to be picked up at some point anyway.
+    for (uint32_t i = index; i < m_buckets.size(); i++) {
+      if (m_buckets[i].idleWorkers) {
+        m_buckets[i].cond.notify_one();
+        break;
+      }
+    }
+  }
+
+
  void DxvkPipelineWorkers::startWorkers() {
-    if (!m_workersRunning) {
+    if (!std::exchange(m_workersRunning, true)) {
      // Use all available cores by default
      uint32_t workerCount = dxvk::thread::hardware_concurrency();

@ -98,102 +102,74 @@ namespace dxvk {

      // Number of workers that can process pipeline pipelines with normal
      // priority. Any other workers can only build high-priority pipelines.
-      uint32_t npWorkerCount = m_device->canUseGraphicsPipelineLibrary()
-        ? std::max(((workerCount - 1) * 5) / 7, 1u)
-        : workerCount;
-      uint32_t hpWorkerCount = workerCount - npWorkerCount;
+      uint32_t npWorkerCount = std::max(((workerCount - 1) * 5) / 7, 1u);
+      uint32_t lpWorkerCount = std::max(((workerCount - 1) * 2) / 7, 1u);

-      Logger::info(str::format("DXVK: Using ", npWorkerCount, " + ", hpWorkerCount, " compiler threads"));
-      m_workers.resize(npWorkerCount + hpWorkerCount);
+      m_workers.reserve(workerCount);

-      // Set worker flag so that they don't exit immediately
-      m_workersRunning = true;
+      for (size_t i = 0; i < workerCount; i++) {
+        DxvkPipelinePriority priority = DxvkPipelinePriority::Normal;

-      for (size_t i = 0; i < m_workers.size(); i++) {
-        m_workers[i] = i >= npWorkerCount
-          ? dxvk::thread([this] { runWorkerPrioritized(); })
-          : dxvk::thread([this] { runWorker(); });
-        m_workers[i].set_priority(ThreadPriority::Lowest);
+        if (m_device->canUseGraphicsPipelineLibrary()) {
+          if (i >= npWorkerCount)
+            priority = DxvkPipelinePriority::High;
+          else if (i < lpWorkerCount)
+            priority = DxvkPipelinePriority::Low;
+        }
+
+        m_workers.emplace_back([this, priority] {
+          runWorker(priority);
+        });
      }
+
+      Logger::info(str::format("DXVK: Using ", workerCount, " compiler threads"));
    }
  }


-  void DxvkPipelineWorkers::runWorker() {
-    env::setThreadName("dxvk-shader");
+  void DxvkPipelineWorkers::runWorker(DxvkPipelinePriority maxPriority) {
+    static const std::array<char, 3> suffixes = { 'h', 'n', 'l' };
+
+    const uint32_t maxPriorityIndex = uint32_t(maxPriority);
+    env::setThreadName(str::format("dxvk-shader-", suffixes.at(maxPriorityIndex)));

    while (true) {
-      std::optional<PipelineEntry> p;
-      std::optional<PipelineLibraryEntry> l;
+      PipelineEntry entry;

-      { std::unique_lock lock(m_queueLock);
+      { std::unique_lock lock(m_lock);
+        auto& bucket = m_buckets[maxPriorityIndex];

-        m_queueCond.wait(lock, [this] {
-          return !m_workersRunning
-              || !m_queuedLibrariesPrioritized.empty()
-              || !m_queuedLibraries.empty()
-              || !m_queuedPipelines.empty();
+        bucket.idleWorkers += 1;
+        bucket.cond.wait(lock, [this, maxPriorityIndex, &entry] {
+          // Attempt to fetch a work item from the
+          // highest-priority queue that is not empty
+          for (uint32_t i = 0; i <= maxPriorityIndex; i++) {
+            if (!m_buckets[i].queue.empty()) {
+              entry = m_buckets[i].queue.front();
+              m_buckets[i].queue.pop();
+              return true;
+            }
+          }
+
+          return !m_workersRunning;
        });

-        if (!m_workersRunning) {
-          // Skip pending work, exiting early is
-          // more important in this case.
-          break;
-        } else if (!m_queuedLibrariesPrioritized.empty()) {
-          l = m_queuedLibrariesPrioritized.front();
-          m_queuedLibrariesPrioritized.pop();
-        } else if (!m_queuedLibraries.empty()) {
-          l = m_queuedLibraries.front();
-          m_queuedLibraries.pop();
-        } else if (!m_queuedPipelines.empty()) {
-          p = m_queuedPipelines.front();
-          m_queuedPipelines.pop();
-        }
-      }
-
-      if (l) {
-        if (l->pipelineLibrary)
-          l->pipelineLibrary->compilePipeline();
-
-        m_pendingTasks -= 1;
-      }
-
-      if (p) {
-        if (p->graphicsPipeline) {
-          p->graphicsPipeline->compilePipeline(p->graphicsState);
-          p->graphicsPipeline->releasePipeline();
-        }
-
-        m_pendingTasks -= 1;
-      }
-    }
-  }
-
-
-  void DxvkPipelineWorkers::runWorkerPrioritized() {
-    env::setThreadName("dxvk-shader-p");
-
-    while (true) {
-      PipelineLibraryEntry l = { };
-
-      { std::unique_lock lock(m_queueLock);
-
-        m_queueCondPrioritized.wait(lock, [this] {
-          return !m_workersRunning
-              || !m_queuedLibrariesPrioritized.empty();
-        });
+        bucket.idleWorkers -= 1;

+        // Skip pending work, exiting early is
+        // more important in this case.
        if (!m_workersRunning)
          break;
-
-        l = m_queuedLibrariesPrioritized.front();
-        m_queuedLibrariesPrioritized.pop();
      }

-      if (l.pipelineLibrary)
-        l.pipelineLibrary->compilePipeline();
-
-      m_pendingTasks -= 1;
+      if (entry.pipelineLibrary) {
+        entry.pipelineLibrary->compilePipeline();
+        m_pendingTasks -= 1;
+      } else if (entry.graphicsPipeline) {
+        entry.graphicsPipeline->compilePipeline(entry.graphicsState);
+        entry.graphicsPipeline->releasePipeline();
+        m_pendingTasks -= 1;
+      }
    }
  }

--- a/src/dxvk/dxvk_pipemanager.h
+++ b/src/dxvk/dxvk_pipemanager.h
@ -38,8 +38,9 @@ namespace dxvk {
   * \brief Pipeline priority
   */
  enum class DxvkPipelinePriority : uint32_t {
-    Normal  = 0,
-    High    = 1,
+    High    = 0,
+    Normal  = 1,
+    Low     = 2,
  };

  /**
@ -78,7 +79,8 @@ namespace dxvk {
     */
    void compileGraphicsPipeline(
            DxvkGraphicsPipeline*           pipeline,
-      const DxvkGraphicsPipelineStateInfo&  state);
+      const DxvkGraphicsPipelineStateInfo&  state,
+            DxvkPipelinePriority            priority);

    /**
     * \brief Checks whether workers are busy
@ -97,34 +99,41 @@ namespace dxvk {
  private:

    struct PipelineEntry {
+      PipelineEntry()
+      : pipelineLibrary(nullptr), graphicsPipeline(nullptr) { }
+
+      PipelineEntry(DxvkShaderPipelineLibrary* l)
+      : pipelineLibrary(l), graphicsPipeline(nullptr) { }
+
+      PipelineEntry(DxvkGraphicsPipeline* p, const DxvkGraphicsPipelineStateInfo& s)
+      : pipelineLibrary(nullptr), graphicsPipeline(p), graphicsState(s) { }
+
+      DxvkShaderPipelineLibrary*    pipelineLibrary;
      DxvkGraphicsPipeline*         graphicsPipeline;
      DxvkGraphicsPipelineStateInfo graphicsState;
    };

-    struct PipelineLibraryEntry {
-      DxvkShaderPipelineLibrary*    pipelineLibrary;
+    struct PipelineBucket {
+      dxvk::condition_variable  cond;
+      std::queue<PipelineEntry> queue;
+      uint32_t                  idleWorkers = 0;
    };

    DxvkDevice*                       m_device;

    std::atomic<uint64_t>             m_pendingTasks = { 0ull };

-    dxvk::mutex                       m_queueLock;
-    dxvk::condition_variable          m_queueCond;
-    dxvk::condition_variable          m_queueCondPrioritized;
-
-    std::queue<PipelineLibraryEntry>  m_queuedLibrariesPrioritized;
-    std::queue<PipelineLibraryEntry>  m_queuedLibraries;
-    std::queue<PipelineEntry>         m_queuedPipelines;
+    dxvk::mutex                       m_lock;
+    std::array<PipelineBucket, 3>     m_buckets;

    bool                              m_workersRunning = false;
    std::vector<dxvk::thread>         m_workers;

+    void notifyWorkers(DxvkPipelinePriority priority);
+
    void startWorkers();

-    void runWorker();
-
-    void runWorkerPrioritized();
+    void runWorker(DxvkPipelinePriority maxPriority);

  };

--- a/src/dxvk/dxvk_state_cache.cpp
+++ b/src/dxvk/dxvk_state_cache.cpp
@ -453,7 +453,7 @@ namespace dxvk {
          if (!pipeline)
            pipeline = m_pipeManager->createGraphicsPipeline(item.gp);

-          m_pipeWorkers->compileGraphicsPipeline(pipeline, entry.gpState);
+          m_pipeWorkers->compileGraphicsPipeline(pipeline, entry.gpState, DxvkPipelinePriority::Normal);
        } break;

        case DxvkStateCacheEntryType::PipelineLibrary: {