[dxvk] Make use of VK_AMD_rasterization_order

May slightly improve GPU performance in some scenarios.
2018-01-16 15:00:19 +01:00 · 2018-01-16 15:00:19 +01:00 · d3f84688cc
parent 27573e9b25
commit d3f84688cc
8 changed files with 77 additions and 33 deletions
--- a/src/dxvk/dxvk_compute.cpp
+++ b/src/dxvk/dxvk_compute.cpp
@ -1,20 +1,22 @@
 #include "dxvk_compute.h"
+#include "dxvk_device.h"

 namespace dxvk {
  
  DxvkComputePipeline::DxvkComputePipeline(
-    const Rc<vk::DeviceFn>&       vkd,
+    const DxvkDevice*             device,
    const Rc<DxvkPipelineCache>&  cache,
    const Rc<DxvkShader>&         cs)
-  : m_vkd(vkd), m_cache(cache) {
+  : m_device(device), m_vkd(device->vkd()),
+    m_cache(cache) {
    DxvkDescriptorSlotMapping slotMapping;
    cs->defineResourceSlots(slotMapping);
    
-    m_layout = new DxvkBindingLayout(vkd,
+    m_layout = new DxvkBindingLayout(m_vkd,
      slotMapping.bindingCount(),
      slotMapping.bindingInfos());
    
-    m_cs = cs->createShaderModule(vkd, slotMapping);
+    m_cs = cs->createShaderModule(m_vkd, slotMapping);
    
    this->compilePipeline();
  }
--- a/src/dxvk/dxvk_compute.h
+++ b/src/dxvk/dxvk_compute.h
@ -7,6 +7,8 @@

 namespace dxvk {
  
+  class DxvkDevice;
+  
  /**
   * \brief Compute pipeline
   * 
@ -20,7 +22,7 @@ namespace dxvk {
  public:
    
    DxvkComputePipeline(
-      const Rc<vk::DeviceFn>&       vkd,
+      const DxvkDevice*             device,
      const Rc<DxvkPipelineCache>&  cache,
      const Rc<DxvkShader>&         cs);
    ~DxvkComputePipeline();
@ -47,7 +49,9 @@ namespace dxvk {
    
  private:
    
-    Rc<vk::DeviceFn>      m_vkd;
+    const DxvkDevice* const m_device;
+    const Rc<vk::DeviceFn>  m_vkd;
+    
    Rc<DxvkPipelineCache> m_cache;
    Rc<DxvkBindingLayout> m_layout;
    Rc<DxvkShaderModule>  m_cs;
--- a/src/dxvk/dxvk_device.cpp
+++ b/src/dxvk/dxvk_device.cpp
@ -15,7 +15,7 @@ namespace dxvk {
    m_memory          (new DxvkMemoryAllocator(adapter, vkd)),
    m_renderPassPool  (new DxvkRenderPassPool (vkd)),
    m_pipelineCache   (new DxvkPipelineCache  (vkd)),
-    m_pipelineManager (new DxvkPipelineManager(vkd)),
+    m_pipelineManager (new DxvkPipelineManager(this)),
    m_submissionQueue (this) {
    m_options.adjustAppOptions(env::getExeName());
    m_options.adjustDeviceOptions(m_adapter);
--- a/src/dxvk/dxvk_graphics.cpp
+++ b/src/dxvk/dxvk_graphics.cpp
@ -1,5 +1,6 @@
 #include <cstring>

+#include "dxvk_device.h"
 #include "dxvk_graphics.h"

 namespace dxvk {
@ -33,14 +34,15 @@ namespace dxvk {
  
  
  DxvkGraphicsPipeline::DxvkGraphicsPipeline(
-      const Rc<vk::DeviceFn>&       vkd,
-      const Rc<DxvkPipelineCache>&  cache,
-      const Rc<DxvkShader>&         vs,
-      const Rc<DxvkShader>&         tcs,
-      const Rc<DxvkShader>&         tes,
-      const Rc<DxvkShader>&         gs,
-      const Rc<DxvkShader>&         fs)
-  : m_vkd(vkd), m_cache(cache) {
+    const DxvkDevice*             device,
+    const Rc<DxvkPipelineCache>&  cache,
+    const Rc<DxvkShader>&         vs,
+    const Rc<DxvkShader>&         tcs,
+    const Rc<DxvkShader>&         tes,
+    const Rc<DxvkShader>&         gs,
+    const Rc<DxvkShader>&         fs)
+  : m_device(device), m_vkd(device->vkd()),
+    m_cache(cache) {
    DxvkDescriptorSlotMapping slotMapping;
    if (vs  != nullptr) vs ->defineResourceSlots(slotMapping);
    if (tcs != nullptr) tcs->defineResourceSlots(slotMapping);
@ -48,15 +50,15 @@ namespace dxvk {
    if (gs  != nullptr) gs ->defineResourceSlots(slotMapping);
    if (fs  != nullptr) fs ->defineResourceSlots(slotMapping);
    
-    m_layout = new DxvkBindingLayout(vkd,
+    m_layout = new DxvkBindingLayout(m_vkd,
      slotMapping.bindingCount(),
      slotMapping.bindingInfos());
    
-    if (vs  != nullptr) m_vs  = vs ->createShaderModule(vkd, slotMapping);
-    if (tcs != nullptr) m_tcs = tcs->createShaderModule(vkd, slotMapping);
-    if (tes != nullptr) m_tes = tes->createShaderModule(vkd, slotMapping);
-    if (gs  != nullptr) m_gs  = gs ->createShaderModule(vkd, slotMapping);
-    if (fs  != nullptr) m_fs  = fs ->createShaderModule(vkd, slotMapping);
+    if (vs  != nullptr) m_vs  = vs ->createShaderModule(m_vkd, slotMapping);
+    if (tcs != nullptr) m_tcs = tcs->createShaderModule(m_vkd, slotMapping);
+    if (tes != nullptr) m_tes = tes->createShaderModule(m_vkd, slotMapping);
+    if (gs  != nullptr) m_gs  = gs ->createShaderModule(m_vkd, slotMapping);
+    if (fs  != nullptr) m_fs  = fs ->createShaderModule(m_vkd, slotMapping);
    
    m_vsIn  = vs != nullptr ? vs->interfaceSlots().inputSlots  : 0;
    m_fsOut = fs != nullptr ? fs->interfaceSlots().outputSlots : 0;
@ -146,9 +148,14 @@ namespace dxvk {
    vpInfo.scissorCount           = state.rsViewportCount;
    vpInfo.pScissors              = nullptr;
    
+    VkPipelineRasterizationStateRasterizationOrderAMD rsOrder;
+    rsOrder.sType                 = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD;
+    rsOrder.pNext                 = nullptr;
+    rsOrder.rasterizationOrder    = this->pickRasterizationOrder(state);
+    
    VkPipelineRasterizationStateCreateInfo rsInfo;
    rsInfo.sType                  = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
-    rsInfo.pNext                  = nullptr;
+    rsInfo.pNext                  = m_device->extensions().amdRasterizationOrder.enabled() ? &rsOrder : rsOrder.pNext;
    rsInfo.flags                  = 0;
    rsInfo.depthClampEnable       = state.rsEnableDepthClamp;
    rsInfo.rasterizerDiscardEnable= state.rsEnableDiscard;
@ -260,4 +267,29 @@ namespace dxvk {
    return true;
  }
  
+  
+  VkRasterizationOrderAMD DxvkGraphicsPipeline::pickRasterizationOrder(
+    const DxvkGraphicsPipelineStateInfo& state) const {
+    // If blending is not enabled, we can enable out-of-order
+    // rasterization for certain depth-compare modes.
+    bool blendingEnabled = false;
+    
+    for (uint32_t i = 0; i < MaxNumRenderTargets; i++) {
+      if (m_fsOut & (1u << i))
+        blendingEnabled |= state.omBlendAttachments[i].blendEnable;
+    }
+    
+    if (!blendingEnabled) {
+      if (m_device->hasOption(DxvkOption::AssumeNoZfight))
+        return VK_RASTERIZATION_ORDER_RELAXED_AMD;
+      
+      if (state.dsDepthCompareOp == VK_COMPARE_OP_NEVER
+       || state.dsDepthCompareOp == VK_COMPARE_OP_LESS
+       || state.dsDepthCompareOp == VK_COMPARE_OP_GREATER)
+        return VK_RASTERIZATION_ORDER_RELAXED_AMD;
+    }
+    
+    return VK_RASTERIZATION_ORDER_STRICT_AMD;
+  }
+  
 }
--- a/src/dxvk/dxvk_graphics.h
+++ b/src/dxvk/dxvk_graphics.h
@ -12,6 +12,8 @@

 namespace dxvk {
  
+  class DxvkDevice;
+  
  /**
   * \brief Graphics pipeline state info
   * 
@ -72,7 +74,7 @@ namespace dxvk {
    VkBool32                            omEnableLogicOp;
    VkLogicOp                           omLogicOp;
    VkRenderPass                        omRenderPass;
-    VkPipelineColorBlendAttachmentState omBlendAttachments[DxvkLimits::MaxNumRenderTargets];
+    VkPipelineColorBlendAttachmentState omBlendAttachments[MaxNumRenderTargets];
  };
  
  
@ -88,7 +90,7 @@ namespace dxvk {
  public:
    
    DxvkGraphicsPipeline(
-      const Rc<vk::DeviceFn>&       vkd,
+      const DxvkDevice*             device,
      const Rc<DxvkPipelineCache>&  cache,
      const Rc<DxvkShader>&         vs,
      const Rc<DxvkShader>&         tcs,
@ -127,7 +129,9 @@ namespace dxvk {
      VkPipeline                    pipeline;
    };
    
-    Rc<vk::DeviceFn>      m_vkd;
+    const DxvkDevice* const m_device;
+    const Rc<vk::DeviceFn>  m_vkd;
+    
    Rc<DxvkPipelineCache> m_cache;
    Rc<DxvkBindingLayout> m_layout;
    
@ -154,6 +158,9 @@ namespace dxvk {
    bool validatePipelineState(
      const DxvkGraphicsPipelineStateInfo& state) const;
    
+    VkRasterizationOrderAMD pickRasterizationOrder(
+      const DxvkGraphicsPipelineStateInfo& state) const;
+    
  };
  
 }
--- a/src/dxvk/dxvk_options.cpp
+++ b/src/dxvk/dxvk_options.cpp
@ -5,7 +5,7 @@
 namespace dxvk {
  
  const static std::unordered_map<std::string, DxvkOptionSet> g_appOptions = {{
-    
+    { "NieRAutomata.exe", DxvkOptionSet(DxvkOption::AssumeNoZfight) },
  }};
  
  
--- a/src/dxvk/dxvk_pipemanager.cpp
+++ b/src/dxvk/dxvk_pipemanager.cpp
@ -35,8 +35,8 @@ namespace dxvk {
  }
  
  
-  DxvkPipelineManager::DxvkPipelineManager(const Rc<vk::DeviceFn>& vkd)
-  : m_vkd(vkd) {
+  DxvkPipelineManager::DxvkPipelineManager(const DxvkDevice* device)
+  : m_device(device) {
    
  }
  
@ -62,7 +62,7 @@ namespace dxvk {
      return pair->second;
    
    const Rc<DxvkComputePipeline> pipeline
-      = new DxvkComputePipeline(m_vkd, cache, cs);
+      = new DxvkComputePipeline(m_device, cache, cs);
    m_computePipelines.insert(std::make_pair(key, pipeline));
    return pipeline;
  }
@ -92,7 +92,7 @@ namespace dxvk {
      return pair->second;
    
    const Rc<DxvkGraphicsPipeline> pipeline
-      = new DxvkGraphicsPipeline(m_vkd, cache, vs, tcs, tes, gs, fs);
+      = new DxvkGraphicsPipeline(m_device, cache, vs, tcs, tes, gs, fs);
    m_graphicsPipelines.insert(std::make_pair(key, pipeline));
    return pipeline;
  }
--- a/src/dxvk/dxvk_pipemanager.h
+++ b/src/dxvk/dxvk_pipemanager.h
@ -59,8 +59,7 @@ namespace dxvk {
    
  public:
    
-    DxvkPipelineManager(
-      const Rc<vk::DeviceFn>&   vkd);
+    DxvkPipelineManager(const DxvkDevice* device);
    ~DxvkPipelineManager();
    
    /**
@ -99,7 +98,7 @@ namespace dxvk {
    
  private:
    
-    const Rc<vk::DeviceFn> m_vkd;
+    const DxvkDevice* m_device;
    
    std::mutex m_mutex;