Merge branch 'disable-opt-bit'

2018-05-13 16:19:31 +02:00 · 2018-05-13 16:19:31 +02:00 · 516d7f091e
parent c17f4e2fc0 f42f7cc743
commit 516d7f091e
8 changed files with 350 additions and 77 deletions
--- a/README.md
+++ b/README.md
@ -82,6 +82,7 @@ The following environment variables can be used for **debugging** purposes.
 - `DXVK_CUSTOM_DEVICE_ID=<ID>` Specifies a custom PCI device ID
 - `DXVK_LOG_LEVEL=none|error|warn|info|debug` Controls message logging
 - `DXVK_FAKE_DX10_SUPPORT=1` Advertizes support for D3D10 interfaces
+- `DXVK_USE_PIPECOMPILER=1` Enable asynchronous pipeline compilation. This currently only has an effect on RADV in mesa-git.

 ## Troubleshooting
 DXVK requires threading support from your mingw-w64 build environment. If you
--- a/src/dxvk/dxvk_graphics.cpp
+++ b/src/dxvk/dxvk_graphics.cpp
@ -34,16 +34,37 @@ namespace dxvk {
  }
  
  
+  DxvkGraphicsPipelineInstance::DxvkGraphicsPipelineInstance(
+    const Rc<vk::DeviceFn>&               vkd,
+    const DxvkGraphicsPipelineStateInfo&  stateVector,
+          VkRenderPass                    renderPass,
+          VkPipeline                      basePipeline)
+  : m_vkd         (vkd),
+    m_stateVector (stateVector),
+    m_renderPass  (renderPass),
+    m_basePipeline(basePipeline),
+    m_fastPipeline(VK_NULL_HANDLE) {
+    
+  }
+  
+  
+  DxvkGraphicsPipelineInstance::~DxvkGraphicsPipelineInstance() {
+    m_vkd->vkDestroyPipeline(m_vkd->device(), m_basePipeline, nullptr);
+    m_vkd->vkDestroyPipeline(m_vkd->device(), m_fastPipeline, nullptr);
+  }
+  
+  
  DxvkGraphicsPipeline::DxvkGraphicsPipeline(
-    const DxvkDevice*             device,
-    const Rc<DxvkPipelineCache>&  cache,
-    const Rc<DxvkShader>&         vs,
-    const Rc<DxvkShader>&         tcs,
-    const Rc<DxvkShader>&         tes,
-    const Rc<DxvkShader>&         gs,
-    const Rc<DxvkShader>&         fs)
+    const DxvkDevice*               device,
+    const Rc<DxvkPipelineCache>&    cache,
+    const Rc<DxvkPipelineCompiler>& compiler,
+    const Rc<DxvkShader>&           vs,
+    const Rc<DxvkShader>&           tcs,
+    const Rc<DxvkShader>&           tes,
+    const Rc<DxvkShader>&           gs,
+    const Rc<DxvkShader>&           fs)
  : m_device(device), m_vkd(device->vkd()),
-    m_cache(cache) {
+    m_cache(cache), m_compiler(compiler) {
    DxvkDescriptorSlotMapping slotMapping;
    if (vs  != nullptr) vs ->defineResourceSlots(slotMapping);
    if (tcs != nullptr) tcs->defineResourceSlots(slotMapping);
@ -71,7 +92,7 @@ namespace dxvk {
  
  
  DxvkGraphicsPipeline::~DxvkGraphicsPipeline() {
-    this->destroyPipelines();
+    
  }
  
  
@ -79,61 +100,96 @@ namespace dxvk {
    const DxvkGraphicsPipelineStateInfo& state,
    const DxvkRenderPass&                renderPass,
          DxvkStatCounters&              stats) {
-    VkPipeline pipeline = VK_NULL_HANDLE;
    VkRenderPass renderPassHandle = renderPass.getDefaultHandle();
    
    { std::lock_guard<sync::Spinlock> lock(m_mutex);
      
-      if (this->findPipeline(state, renderPassHandle, pipeline))
-        return pipeline;
+      DxvkGraphicsPipelineInstance* pipeline =
+        this->findInstance(state, renderPassHandle);
+      
+      if (pipeline != nullptr)
+        return pipeline->getPipeline();
    }
    
-    // If no pipeline exists with the given state vector,
-    // create a new one and add it to the pipeline set.
-    VkPipeline newPipeline = this->validatePipelineState(state)
-      ? this->compilePipeline(state, renderPassHandle, m_basePipeline)
-      : VK_NULL_HANDLE;
+    // If the pipeline state vector is invalid, don't try
+    // to create a new pipeline, it won't work anyway.
+    if (!this->validatePipelineState(state))
+      return VK_NULL_HANDLE;
+    
+    // If no pipeline instance exists with the given state
+    // vector, create a new one and add it to the list.
+    VkPipeline newPipelineBase   = m_basePipelineBase.load();
+    VkPipeline newPipelineHandle = this->compilePipeline(state, renderPassHandle,
+      m_compiler != nullptr ? VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0,
+      newPipelineBase);
+    
+    Rc<DxvkGraphicsPipelineInstance> newPipeline =
+      new DxvkGraphicsPipelineInstance(m_device->vkd(), state,
+        renderPassHandle, newPipelineHandle);
    
    { std::lock_guard<sync::Spinlock> lock(m_mutex);
      
      // Discard the pipeline if another thread
      // was faster compiling the same pipeline
-      if (this->findPipeline(state, renderPassHandle, pipeline)) {
-        m_vkd->vkDestroyPipeline(m_vkd->device(), newPipeline, nullptr);
-        return pipeline;
-      }
+      DxvkGraphicsPipelineInstance* pipeline =
+        this->findInstance(state, renderPassHandle);
+      
+      if (pipeline != nullptr)
+        return pipeline->getPipeline();
      
      // Add new pipeline to the set
-      m_pipelines.push_back({ state, renderPassHandle, newPipeline });
-      
-      if (m_basePipeline == VK_NULL_HANDLE)
-        m_basePipeline = newPipeline;
+      m_pipelines.push_back(newPipeline);
      
      stats.addCtr(DxvkStatCounter::PipeCountGraphics, 1);
-      return newPipeline;
    }
+    
+    // Use the new pipeline as the base pipeline for derivative pipelines
+    if (newPipelineBase == VK_NULL_HANDLE && newPipelineHandle != VK_NULL_HANDLE)
+      m_basePipelineBase.compare_exchange_strong(newPipelineBase, newPipelineHandle);
+    
+    // Compile optimized pipeline asynchronously
+    if (m_compiler != nullptr)
+      m_compiler->queueCompilation(this, newPipeline);
+    
+    return newPipelineHandle;
  }
  
  
-  bool DxvkGraphicsPipeline::findPipeline(
+  void DxvkGraphicsPipeline::compileInstance(
+    const Rc<DxvkGraphicsPipelineInstance>& instance) {
+    // Compile an optimized version of the pipeline
+    VkPipeline newPipelineBase   = m_fastPipelineBase.load();
+    VkPipeline newPipelineHandle = this->compilePipeline(
+      instance->m_stateVector, instance->m_renderPass,
+      0, m_fastPipelineBase);
+    
+    // Use the new pipeline as the base pipeline for derivative pipelines
+    if (newPipelineBase == VK_NULL_HANDLE && newPipelineHandle != VK_NULL_HANDLE)
+      m_fastPipelineBase.compare_exchange_strong(newPipelineBase, newPipelineHandle);
+    
+    // If an optimized version has been compiled
+    // in the meantime, discard the new pipeline
+    if (!instance->setFastPipeline(newPipelineHandle))
+      m_vkd->vkDestroyPipeline(m_vkd->device(), newPipelineHandle, nullptr);
+  }
+  
+  
+  DxvkGraphicsPipelineInstance* DxvkGraphicsPipeline::findInstance(
    const DxvkGraphicsPipelineStateInfo& state,
-          VkRenderPass                   renderPass,
-          VkPipeline&                    pipeline) const {
-    for (const PipelineStruct& pair : m_pipelines) {
-      if (pair.stateVector == state
-       && pair.renderPass  == renderPass) {
-        pipeline = pair.pipeline;
-        return true;
-      }
+          VkRenderPass                   renderPass) const {
+    for (const auto& pipeline : m_pipelines) {
+      if (pipeline->isCompatible(state, renderPass))
+        return pipeline.ptr();
    }
    
-    return false;
+    return nullptr;
  }
  
  
  VkPipeline DxvkGraphicsPipeline::compilePipeline(
    const DxvkGraphicsPipelineStateInfo& state,
          VkRenderPass                   renderPass,
+          VkPipelineCreateFlags          createFlags,
          VkPipeline                     baseHandle) const {
    if (Logger::logLevel() <= LogLevel::Debug) {
      Logger::debug("Compiling graphics pipeline...");
@ -287,9 +343,7 @@ namespace dxvk {
    VkGraphicsPipelineCreateInfo info;
    info.sType                    = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
    info.pNext                    = nullptr;
-    info.flags                    = baseHandle == VK_NULL_HANDLE
-      ? VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT
-      : VK_PIPELINE_CREATE_DERIVATIVE_BIT;
+    info.flags                    = createFlags;
    info.stageCount               = stages.size();
    info.pStages                  = stages.data();
    info.pVertexInputState        = &viInfo;
@ -307,6 +361,10 @@ namespace dxvk {
    info.basePipelineHandle       = baseHandle;
    info.basePipelineIndex        = -1;
    
+    info.flags |= baseHandle == VK_NULL_HANDLE
+      ? VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT
+      : VK_PIPELINE_CREATE_DERIVATIVE_BIT;
+    
    if (tsInfo.patchControlPoints == 0)
      info.pTessellationState = nullptr;
    
@ -328,12 +386,6 @@ namespace dxvk {
  }
  
  
-  void DxvkGraphicsPipeline::destroyPipelines() {
-    for (const PipelineStruct& pair : m_pipelines)
-      m_vkd->vkDestroyPipeline(m_vkd->device(), pair.pipeline, nullptr);
-  }
-  
-  
  bool DxvkGraphicsPipeline::validatePipelineState(
    const DxvkGraphicsPipelineStateInfo& state) const {
    // Validate vertex input - each input slot consumed by the
@ -343,17 +395,13 @@ namespace dxvk {
    for (uint32_t i = 0; i < state.ilAttributeCount; i++)
      providedVertexInputs |= 1u << state.ilAttributes[i].location;
    
-    if ((providedVertexInputs & m_vsIn) != m_vsIn) {
-      Logger::err("DxvkGraphicsPipeline: Input layout mismatches vertex shader input");
+    if ((providedVertexInputs & m_vsIn) != m_vsIn)
      return false;
-    }
    
    // If there are no tessellation shaders, we
    // obviously cannot use tessellation patches.
-    if ((state.iaPatchVertexCount != 0) && (m_tcs == nullptr || m_tes == nullptr)) {
-      Logger::err("DxvkGraphicsPipeline: Cannot use tessellation patches without tessellation shaders");
+    if ((state.iaPatchVertexCount != 0) && (m_tcs == nullptr || m_tes == nullptr))
      return false;
-    }
    
    // No errors
    return true;
--- a/src/dxvk/dxvk_graphics.h
+++ b/src/dxvk/dxvk_graphics.h
@ -5,6 +5,7 @@
 #include "dxvk_binding.h"
 #include "dxvk_constant_state.h"
 #include "dxvk_pipecache.h"
+#include "dxvk_pipecompiler.h"
 #include "dxvk_pipelayout.h"
 #include "dxvk_renderpass.h"
 #include "dxvk_resource.h"
@ -90,6 +91,79 @@ namespace dxvk {
  };
  
  
+  /**
+   * \brief Graphics pipeline instance
+   * 
+   * Stores a state vector and the corresponding
+   * unoptimized and optimized pipeline handles.
+   */
+  class DxvkGraphicsPipelineInstance : public RcObject {
+    friend class DxvkGraphicsPipeline;
+  public:
+    
+    DxvkGraphicsPipelineInstance(
+      const Rc<vk::DeviceFn>&               vkd,
+      const DxvkGraphicsPipelineStateInfo&  stateVector,
+            VkRenderPass                    renderPass,
+            VkPipeline                      basePipeline);
+    
+    ~DxvkGraphicsPipelineInstance();
+    
+    /**
+     * \brief Checks for matching pipeline state
+     * 
+     * \param [in] stateVector Graphics pipeline state
+     * \param [in] renderPass Render pass handle
+     * \returns \c true if the specialization is compatible
+     */
+    bool isCompatible(
+      const DxvkGraphicsPipelineStateInfo&  stateVector,
+            VkRenderPass                    renderPass) const {
+      return m_renderPass  == renderPass
+          && m_stateVector == stateVector;
+    }
+    
+    /**
+     * \brief Sets the optimized pipeline handle
+     * 
+     * If an optimized pipeline handle has already been
+     * set up, this method will fail and the new pipeline
+     * handle should be destroyed.
+     * \param [in] pipeline The optimized pipeline
+     */
+    bool setFastPipeline(VkPipeline pipeline) {
+      VkPipeline expected = VK_NULL_HANDLE;
+      return m_fastPipeline.compare_exchange_strong(expected, pipeline);
+    }
+    
+    /**
+     * \brief Retrieves pipeline
+     * 
+     * Returns the optimized version of the pipeline if
+     * if has been set, or the base pipeline if not.
+     * \returns The pipeline handle
+     */
+    VkPipeline getPipeline() const {
+      VkPipeline basePipeline = m_basePipeline.load();
+      VkPipeline fastPipeline = m_fastPipeline.load();
+      
+      return fastPipeline != VK_NULL_HANDLE
+        ? fastPipeline : basePipeline;
+    }
+    
+  private:
+    
+    const Rc<vk::DeviceFn> m_vkd;
+    
+    DxvkGraphicsPipelineStateInfo m_stateVector;
+    VkRenderPass                  m_renderPass;
+    
+    std::atomic<VkPipeline> m_basePipeline;
+    std::atomic<VkPipeline> m_fastPipeline;
+    
+  };
+  
+  
  /**
   * \brief Graphics pipeline
   * 
@ -102,13 +176,14 @@ namespace dxvk {
  public:
    
    DxvkGraphicsPipeline(
-      const DxvkDevice*             device,
-      const Rc<DxvkPipelineCache>&  cache,
-      const Rc<DxvkShader>&         vs,
-      const Rc<DxvkShader>&         tcs,
-      const Rc<DxvkShader>&         tes,
-      const Rc<DxvkShader>&         gs,
-      const Rc<DxvkShader>&         fs);
+      const DxvkDevice*               device,
+      const Rc<DxvkPipelineCache>&    cache,
+      const Rc<DxvkPipelineCompiler>& compiler,
+      const Rc<DxvkShader>&           vs,
+      const Rc<DxvkShader>&           tcs,
+      const Rc<DxvkShader>&           tes,
+      const Rc<DxvkShader>&           gs,
+      const Rc<DxvkShader>&           fs);
    ~DxvkGraphicsPipeline();
    
    /**
@ -134,9 +209,19 @@ namespace dxvk {
     * \returns Pipeline handle
     */
    VkPipeline getPipelineHandle(
-      const DxvkGraphicsPipelineStateInfo& state,
-      const DxvkRenderPass&                renderPass,
-            DxvkStatCounters&              stats);
+      const DxvkGraphicsPipelineStateInfo&    state,
+      const DxvkRenderPass&                   renderPass,
+            DxvkStatCounters&                 stats);
+    
+    /**
+     * \brief Compiles optimized pipeline
+     * 
+     * Compiles an optimized version of a pipeline
+     * and makes it available to the system.
+     * \param [in] instance The pipeline instance
+     */
+    void compileInstance(
+      const Rc<DxvkGraphicsPipelineInstance>& instance);
    
  private:
    
@ -149,8 +234,9 @@ namespace dxvk {
    const DxvkDevice* const m_device;
    const Rc<vk::DeviceFn>  m_vkd;
    
-    Rc<DxvkPipelineCache>   m_cache;
-    Rc<DxvkPipelineLayout>  m_layout;
+    Rc<DxvkPipelineCache>     m_cache;
+    Rc<DxvkPipelineCompiler>  m_compiler;
+    Rc<DxvkPipelineLayout>    m_layout;
    
    Rc<DxvkShaderModule>  m_vs;
    Rc<DxvkShaderModule>  m_tcs;
@ -163,23 +249,24 @@ namespace dxvk {
    
    DxvkGraphicsCommonPipelineStateInfo m_common;
    
-    sync::Spinlock              m_mutex;
-    std::vector<PipelineStruct> m_pipelines;
+    // List of pipeline instances, shared between threads
+    alignas(CACHE_LINE_SIZE) sync::Spinlock       m_mutex;
+    std::vector<Rc<DxvkGraphicsPipelineInstance>> m_pipelines;
    
-    VkPipeline m_basePipeline = VK_NULL_HANDLE;
+    // Pipeline handles used for derivative pipelines
+    std::atomic<VkPipeline> m_basePipelineBase = { VK_NULL_HANDLE };
+    std::atomic<VkPipeline> m_fastPipelineBase = { VK_NULL_HANDLE };
    
-    bool findPipeline(
+    DxvkGraphicsPipelineInstance* findInstance(
      const DxvkGraphicsPipelineStateInfo& state,
-            VkRenderPass                   renderPass,
-            VkPipeline&                    pipeline) const;
+            VkRenderPass                   renderPass) const;
    
    VkPipeline compilePipeline(
      const DxvkGraphicsPipelineStateInfo& state,
            VkRenderPass                   renderPass,
+            VkPipelineCreateFlags          createFlags,
            VkPipeline                     baseHandle) const;
    
-    void destroyPipelines();
-    
    bool validatePipelineState(
      const DxvkGraphicsPipelineStateInfo& state) const;
    
--- a/src/dxvk/dxvk_pipecompiler.cpp
+++ b/src/dxvk/dxvk_pipecompiler.cpp
@ -0,0 +1,72 @@
+#include "dxvk_graphics.h"
+#include "dxvk_pipecompiler.h"
+
+namespace dxvk {
+  
+  DxvkPipelineCompiler::DxvkPipelineCompiler() {
+    // Use ~half the CPU cores for pipeline compilation
+    const uint32_t threadCount = std::max<uint32_t>(
+      1u, std::thread::hardware_concurrency() / 2);
+    
+    Logger::debug(str::format(
+      "DxvkPipelineCompiler: Using ", threadCount, " workers"));
+    
+    // Start the compiler threads
+    m_compilerThreads.resize(threadCount);
+    
+    for (uint32_t i = 0; i < threadCount; i++) {
+      m_compilerThreads.at(i) = std::thread(
+        [this, i] { this->runCompilerThread(i); });
+    }
+  }
+  
+  
+  DxvkPipelineCompiler::~DxvkPipelineCompiler() {
+    { std::unique_lock<std::mutex> lock(m_compilerLock);
+      m_compilerStop.store(true);
+    }
+    
+    m_compilerCond.notify_all();
+    for (auto& thread : m_compilerThreads)
+      thread.join();
+  }
+  
+  
+  void DxvkPipelineCompiler::queueCompilation(
+    const Rc<DxvkGraphicsPipeline>&         pipeline,
+    const Rc<DxvkGraphicsPipelineInstance>& instance) {
+    std::unique_lock<std::mutex> lock(m_compilerLock);
+    m_compilerQueue.push({ pipeline, instance });
+    m_compilerCond.notify_one();
+  }
+  
+  
+  void DxvkPipelineCompiler::runCompilerThread(uint32_t workerId) {
+    Logger::debug(str::format(
+      "DxvkPipelineCompiler: Worker #", workerId, " started"));
+    
+    while (!m_compilerStop.load()) {
+      PipelineEntry entry;
+      
+      { std::unique_lock<std::mutex> lock(m_compilerLock);
+        
+        m_compilerCond.wait(lock, [this] {
+          return m_compilerStop.load()
+              || m_compilerQueue.size() != 0;
+        });
+        
+        if (m_compilerQueue.size() != 0) {
+          entry = std::move(m_compilerQueue.front());
+          m_compilerQueue.pop();
+        }
+      }
+      
+      if (entry.pipeline != nullptr && entry.instance != nullptr)
+        entry.pipeline->compileInstance(entry.instance);
+    }
+    
+    Logger::debug(str::format(
+      "DxvkPipelineCompiler: Worker #", workerId, " stopped"));
+  }
+  
+}
--- a/src/dxvk/dxvk_pipecompiler.h
+++ b/src/dxvk/dxvk_pipecompiler.h
@ -0,0 +1,58 @@
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <thread>
+
+#include "dxvk_include.h"
+
+namespace dxvk {
+  
+  class DxvkGraphicsPipeline;
+  class DxvkGraphicsPipelineInstance;
+  
+  /**
+   * \brief Pipeline compiler
+   * 
+   * asynchronous pipeline compiler, which is used
+   * to compile optimized versions of pipelines.
+   */
+  class DxvkPipelineCompiler : public RcObject {
+    
+  public:
+    
+    DxvkPipelineCompiler();
+    ~DxvkPipelineCompiler();
+    
+    /**
+     * \brief Compiles a pipeline asynchronously
+     * 
+     * This should be used to compile optimized
+     * graphics pipeline instances asynchronously.
+     * \param [in] pipeline The pipeline object
+     * \param [in] instance The pipeline instance
+     */
+    void queueCompilation(
+      const Rc<DxvkGraphicsPipeline>&         pipeline,
+      const Rc<DxvkGraphicsPipelineInstance>& instance);
+    
+  private:
+    
+    struct PipelineEntry {
+      Rc<DxvkGraphicsPipeline>         pipeline;
+      Rc<DxvkGraphicsPipelineInstance> instance;
+    };
+    
+    std::atomic<bool>           m_compilerStop = { false };
+    std::mutex                  m_compilerLock;
+    std::condition_variable     m_compilerCond;
+    std::queue<PipelineEntry>   m_compilerQueue;
+    std::vector<std::thread>    m_compilerThreads;
+    
+    void runCompilerThread(uint32_t workerId);
+    
+  };
+  
+}
--- a/src/dxvk/dxvk_pipemanager.cpp
+++ b/src/dxvk/dxvk_pipemanager.cpp
@ -39,8 +39,12 @@ namespace dxvk {
  
  
  DxvkPipelineManager::DxvkPipelineManager(const DxvkDevice* device)
-  : m_device(device), m_cache(new DxvkPipelineCache(device->vkd())) {
-    
+  : m_device  (device),
+    m_cache   (new DxvkPipelineCache(device->vkd())),
+    m_compiler(nullptr) {
+    // Async shader compilation is opt-in for now
+    if (env::getEnvVar(L"DXVK_USE_PIPECOMPILER") == "1")
+      m_compiler = new DxvkPipelineCompiler();
  }
  
  
@ -93,8 +97,8 @@ namespace dxvk {
    if (pair != m_graphicsPipelines.end())
      return pair->second;
    
-    const Rc<DxvkGraphicsPipeline> pipeline
-      = new DxvkGraphicsPipeline(m_device, m_cache, vs, tcs, tes, gs, fs);
+    Rc<DxvkGraphicsPipeline> pipeline = new DxvkGraphicsPipeline(
+      m_device, m_cache, m_compiler, vs, tcs, tes, gs, fs);
    
    m_graphicsPipelines.insert(std::make_pair(key, pipeline));
    return pipeline;
--- a/src/dxvk/dxvk_pipemanager.h
+++ b/src/dxvk/dxvk_pipemanager.h
@ -5,6 +5,7 @@

 #include "dxvk_compute.h"
 #include "dxvk_graphics.h"
+#include "dxvk_pipecompiler.h"

 namespace dxvk {
  
@ -96,8 +97,9 @@ namespace dxvk {
    
  private:
    
-    const DxvkDevice*           m_device;
-    const Rc<DxvkPipelineCache> m_cache;
+    const DxvkDevice*         m_device;
+    Rc<DxvkPipelineCache>     m_cache;
+    Rc<DxvkPipelineCompiler>  m_compiler;
    
    std::mutex m_mutex;
    
--- a/src/dxvk/meson.build
+++ b/src/dxvk/meson.build
@ -43,6 +43,7 @@ dxvk_src = files([
  'dxvk_meta_clear.cpp',
  'dxvk_meta_resolve.cpp',
  'dxvk_pipecache.cpp',
+  'dxvk_pipecompiler.cpp',
  'dxvk_pipelayout.cpp',
  'dxvk_pipemanager.cpp',
  'dxvk_query.cpp',