[dxbc] Implemented vendor-specific workarounds in an attemt to fix Nvidia

2018-01-07 20:05:27 +01:00 · 2018-01-07 20:05:27 +01:00 · f4cd90d6fa
parent 4e06f498dd
commit f4cd90d6fa
16 changed files with 184 additions and 23 deletions
--- a/src/d3d11/d3d11_device.cpp
+++ b/src/d3d11/d3d11_device.cpp
@ -23,7 +23,8 @@ namespace dxvk {
    m_featureLevel  (featureLevel),
    m_featureFlags  (featureFlags),
    m_dxvkDevice    (m_dxgiDevice->GetDXVKDevice()),
-    m_dxvkAdapter   (m_dxvkDevice->adapter()) {
+    m_dxvkAdapter   (m_dxvkDevice->adapter()),
+    m_dxbcOptions   (m_dxvkDevice) {
    Com<IDXGIAdapter> adapter;
    
    if (FAILED(m_dxgiDevice->GetAdapter(&adapter))
@ -1356,7 +1357,7 @@ namespace dxvk {
    
    try {
      *pShaderModule = D3D11ShaderModule(
-        this, pShaderBytecode, BytecodeLength);
+        &m_dxbcOptions, this, pShaderBytecode, BytecodeLength);
      return S_OK;
    } catch (const DxvkError& e) {
      Logger::err(e.message());
--- a/src/d3d11/d3d11_device.h
+++ b/src/d3d11/d3d11_device.h
@ -1,13 +1,15 @@
 #pragma once

+#include "../dxbc/dxbc_options.h"
+
 #include "../dxgi/dxgi_object.h"

+#include "../util/com/com_private_data.h"
+
 #include "d3d11_interfaces.h"
 #include "d3d11_state.h"
 #include "d3d11_util.h"

-#include "../util/com/com_private_data.h"
-
 namespace dxvk {
  class DxgiAdapter;
  
@ -255,6 +257,8 @@ namespace dxvk {
    const Rc<DxvkDevice>            m_dxvkDevice;
    const Rc<DxvkAdapter>           m_dxvkAdapter;
    
+    const DxbcOptions               m_dxbcOptions;
+    
    D3D11DeviceContext*             m_context = nullptr;
    
    std::mutex                      m_resourceInitMutex;
--- a/src/d3d11/d3d11_shader.cpp
+++ b/src/d3d11/d3d11_shader.cpp
@ -8,6 +8,7 @@ namespace dxvk {
  
  
  D3D11ShaderModule::D3D11ShaderModule(
+    const DxbcOptions*  pDxbcOptions,
          D3D11Device*  pDevice,
    const void*         pShaderBytecode,
          size_t        BytecodeLength) {
@ -33,7 +34,7 @@ namespace dxvk {
    }
    
    
-    m_shader = module.compile();
+    m_shader = module.compile(*pDxbcOptions);
      
    if (dumpPath.size() != 0) {
      const std::string baseName = str::format(dumpPath, "/",
--- a/src/d3d11/d3d11_shader.h
+++ b/src/d3d11/d3d11_shader.h
@ -25,6 +25,7 @@ namespace dxvk {
    
    D3D11ShaderModule();
    D3D11ShaderModule(
+      const DxbcOptions*  pDxbcOptions,
            D3D11Device*  pDevice,
      const void*         pShaderBytecode,
            size_t        BytecodeLength);
--- a/src/dxbc/dxbc_compiler.cpp
+++ b/src/dxbc/dxbc_compiler.cpp
@ -7,10 +7,12 @@ namespace dxvk {
  constexpr uint32_t PerVertex_ClipDist  = 2;
  
  DxbcCompiler::DxbcCompiler(
+    const DxbcOptions&        options,
    const DxbcProgramVersion& version,
    const Rc<DxbcIsgn>&       isgn,
    const Rc<DxbcIsgn>&       osgn)
-  : m_version (version),
+  : m_options (options),
+    m_version (version),
    m_isgn    (isgn),
    m_osgn    (osgn) {
    // Declare an entry point ID. We'll need it during the
@ -1040,13 +1042,15 @@ namespace dxvk {
        break;
      
      case DxbcOpcode::Max:
-        dst.id = m_module.opNMax(typeId,
-          src.at(0).id, src.at(1).id);
+        dst.id = m_options.useSimpleMinMaxClamp
+          ? m_module.opFMax(typeId, src.at(0).id, src.at(1).id)
+          : m_module.opNMax(typeId, src.at(0).id, src.at(1).id);
        break;
      
      case DxbcOpcode::Min:
-        dst.id = m_module.opNMin(typeId,
-          src.at(0).id, src.at(1).id);
+        dst.id = m_options.useSimpleMinMaxClamp
+          ? m_module.opFMin(typeId, src.at(0).id, src.at(1).id)
+          : m_module.opNMin(typeId, src.at(0).id, src.at(1).id);
        break;
      
      case DxbcOpcode::Mul:
@ -2211,7 +2215,7 @@ namespace dxvk {
    
    // Load the texture coordinates. SPIR-V allows these
    // to be float4 even if not all components are used.
-    const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
+    DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
    
    // Load reference value for depth-compare operations
    const bool isDepthCompare = ins.op == DxbcOpcode::SampleC
@ -2221,6 +2225,17 @@ namespace dxvk {
      ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
      : DxbcRegisterValue();
    
+    if (isDepthCompare && m_options.packDrefValueIntoCoordinates) {
+      const std::array<uint32_t, 2> packedCoordIds
+        = {{ coord.id, referenceValue.id }};
+      
+      coord.type.ccount += 1;
+      coord.id = m_module.opCompositeConstruct(
+        getVectorTypeId(coord.type),
+        packedCoordIds.size(),
+        packedCoordIds.data());
+    }
+    
    // Load explicit gradients for sample operations that require them
    const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
    
@ -3034,10 +3049,9 @@ namespace dxvk {
    if (value.type.ctype == DxbcScalarType::Float32) {
      // Saturating only makes sense on floats
      if (modifiers.saturate) {
-        value.id = m_module.opNClamp(
-          typeId, value.id,
-          m_module.constf32(0.0f),
-          m_module.constf32(1.0f));
+        value.id = m_options.useSimpleMinMaxClamp
+          ? m_module.opFClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f))
+          : m_module.opNClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f));
      }
    }
    
--- a/src/dxbc/dxbc_compiler.h
+++ b/src/dxbc/dxbc_compiler.h
@ -9,6 +9,7 @@
 #include "dxbc_decoder.h"
 #include "dxbc_defs.h"
 #include "dxbc_names.h"
+#include "dxbc_options.h"
 #include "dxbc_util.h"

 namespace dxvk {
@ -213,6 +214,7 @@ namespace dxvk {
  public:
    
    DxbcCompiler(
+      const DxbcOptions&        options,
      const DxbcProgramVersion& version,
      const Rc<DxbcIsgn>&       isgn,
      const Rc<DxbcIsgn>&       osgn);
@ -233,6 +235,7 @@ namespace dxvk {
    
  private:
    
+    DxbcOptions         m_options;
    DxbcProgramVersion  m_version;
    SpirvModule         m_module;
    
--- a/src/dxbc/dxbc_module.cpp
+++ b/src/dxbc/dxbc_module.cpp
@ -40,13 +40,13 @@ namespace dxvk {
  }
  
  
-  Rc<DxvkShader> DxbcModule::compile() const {
+  Rc<DxvkShader> DxbcModule::compile(const DxbcOptions& options) const {
    if (m_shexChunk == nullptr)
      throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk");
    
    DxbcCodeSlice slice = m_shexChunk->slice();
    
-    DxbcCompiler compiler(
+    DxbcCompiler compiler(options,
      m_shexChunk->version(),
      m_isgnChunk, m_osgnChunk);
    
--- a/src/dxbc/dxbc_module.h
+++ b/src/dxbc/dxbc_module.h
@ -5,6 +5,7 @@
 #include "dxbc_chunk_isgn.h"
 #include "dxbc_chunk_shex.h"
 #include "dxbc_header.h"
+#include "dxbc_options.h"
 #include "dxbc_reader.h"

 // References used for figuring out DXBC:
@ -46,9 +47,11 @@ namespace dxvk {
    
    /**
     * \brief Compiles DXBC shader to SPIR-V module
+     * 
+     * \param [in] options DXBC compiler options
     * \returns The compiled shader object
     */
-    Rc<DxvkShader> compile() const;
+    Rc<DxvkShader> compile(const DxbcOptions& options) const;
    
  private:
    
--- a/src/dxbc/dxbc_options.cpp
+++ b/src/dxbc/dxbc_options.cpp
@ -0,0 +1,29 @@
+#include "dxbc_options.h"
+
+namespace dxvk {
+  
+  DxbcOptions::DxbcOptions(const Rc<DxvkDevice>& device) {
+    const VkPhysicalDeviceProperties deviceProps
+      = device->adapter()->deviceProperties();
+    
+    const DxvkGpuVendor vendor
+      = static_cast<DxvkGpuVendor>(deviceProps.vendorID);
+    
+    if (vendor == DxvkGpuVendor::Nvidia) {
+      // From vkd3d: NMin/NMax/NClamp crash the driver.
+      this->useSimpleMinMaxClamp = true;
+      
+      // From vkd3d: Nvidia expects the depth reference
+      // value to be packed into the coordinate vector.
+      this->packDrefValueIntoCoordinates = true;
+    }
+    
+    // Inform the user about which workarounds are enabled
+    if (this->useSimpleMinMaxClamp)
+      Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp");
+    
+    if (this->packDrefValueIntoCoordinates)
+      Logger::warn("DxbcOptions: Packing depth reference value into coordinate vector");
+  }
+  
+}
--- a/src/dxbc/dxbc_options.h
+++ b/src/dxbc/dxbc_options.h
@ -0,0 +1,26 @@
+#pragma once
+
+#include "../dxvk/dxvk_device.h"
+
+namespace dxvk {
+  
+  /**
+   * \brief DXBC compiler options
+   * 
+   * Defines driver- or device-specific options,
+   * which are mostly workarounds for driver bugs.
+   */
+  struct DxbcOptions {
+    DxbcOptions() { }
+    DxbcOptions(
+      const Rc<DxvkDevice>& device);
+      
+    /// Use Fmin/Fmax instead of Nmin/Nmax.
+    bool useSimpleMinMaxClamp = false;
+    
+    /// Pack the depth reference value into the
+    /// coordinate vector for depth-compare ops.
+    bool packDrefValueIntoCoordinates = false;
+  };
+  
+}
--- a/src/dxbc/meson.build
+++ b/src/dxbc/meson.build
@ -8,6 +8,7 @@ dxbc_src = files([
  'dxbc_header.cpp',
  'dxbc_module.cpp',
  'dxbc_names.cpp',
+  'dxbc_options.cpp',
  'dxbc_reader.cpp',
  'dxbc_util.cpp',
 ])
--- a/src/dxvk/dxvk_adapter.h
+++ b/src/dxvk/dxvk_adapter.h
@ -10,6 +10,16 @@ namespace dxvk {
  class DxvkInstance;
  class DxvkSurface;
  
+  /**
+   * \brief GPU vendors
+   * Based on PCIe IDs.
+   */
+  enum class DxvkGpuVendor : uint16_t {
+    Amd    = 0x1002,
+    Nvidia = 0x10de,
+    Intel  = 0x8086,
+  };
+  
  /**
   * \brief DXVK adapter
   * 
--- a/src/dxvk/dxvk_instance.cpp
+++ b/src/dxvk/dxvk_instance.cpp
@ -70,7 +70,6 @@ namespace dxvk {
    
    if (env::getEnvVar(L"DXVK_DEBUG_LAYERS") == "1")
      layers.push_back("VK_LAYER_LUNARG_standard_validation");
-
    
    const vk::NameSet layersAvailable
      = vk::NameSet::enumerateInstanceLayers(*m_vkl);
--- a/src/spirv/spirv_module.cpp
+++ b/src/spirv/spirv_module.cpp
@ -1507,7 +1507,7 @@ namespace dxvk {
  }
    
  
-  uint32_t SpirvModule::opNMax(
+  uint32_t SpirvModule::opFMax(
          uint32_t                resultType,
          uint32_t                a,
          uint32_t                b) {
@ -1524,7 +1524,7 @@ namespace dxvk {
  }
  
  
-  uint32_t SpirvModule::opNMin(
+  uint32_t SpirvModule::opFMin(
          uint32_t                resultType,
          uint32_t                a,
          uint32_t                b) {
@ -1539,6 +1539,40 @@ namespace dxvk {
    m_code.putWord(b);
    return resultId;
  }
+    
+  
+  uint32_t SpirvModule::opNMax(
+          uint32_t                resultType,
+          uint32_t                a,
+          uint32_t                b) {
+    uint32_t resultId = this->allocateId();
+    
+    m_code.putIns (spv::OpExtInst, 7);
+    m_code.putWord(resultType);
+    m_code.putWord(resultId);
+    m_code.putWord(m_instExtGlsl450);
+    m_code.putWord(spv::GLSLstd450NMax);
+    m_code.putWord(a);
+    m_code.putWord(b);
+    return resultId;
+  }
+  
+  
+  uint32_t SpirvModule::opNMin(
+          uint32_t                resultType,
+          uint32_t                a,
+          uint32_t                b) {
+    uint32_t resultId = this->allocateId();
+    
+    m_code.putIns (spv::OpExtInst, 7);
+    m_code.putWord(resultType);
+    m_code.putWord(resultId);
+    m_code.putWord(m_instExtGlsl450);
+    m_code.putWord(spv::GLSLstd450NMin);
+    m_code.putWord(a);
+    m_code.putWord(b);
+    return resultId;
+  }
  
  
  uint32_t SpirvModule::opSMax(
@ -1609,7 +1643,7 @@ namespace dxvk {
  }
  
  
-  uint32_t SpirvModule::opNClamp(
+  uint32_t SpirvModule::opFClamp(
          uint32_t                resultType,
          uint32_t                x,
          uint32_t                minVal,
@ -1628,6 +1662,25 @@ namespace dxvk {
  }
  
  
+  uint32_t SpirvModule::opNClamp(
+          uint32_t                resultType,
+          uint32_t                x,
+          uint32_t                minVal,
+          uint32_t                maxVal) {
+    uint32_t resultId = this->allocateId();
+    
+    m_code.putIns (spv::OpExtInst, 8);
+    m_code.putWord(resultType);
+    m_code.putWord(resultId);
+    m_code.putWord(m_instExtGlsl450);
+    m_code.putWord(spv::GLSLstd450NClamp);
+    m_code.putWord(x);
+    m_code.putWord(minVal);
+    m_code.putWord(maxVal);
+    return resultId;
+  }
+  
+  
  uint32_t SpirvModule::opIEqual(
          uint32_t                resultType,
          uint32_t                vector1,
--- a/src/spirv/spirv_module.h
+++ b/src/spirv/spirv_module.h
@ -565,6 +565,16 @@ namespace dxvk {
            uint32_t                b,
            uint32_t                c);
    
+    uint32_t opFMax(
+            uint32_t                resultType,
+            uint32_t                a,
+            uint32_t                b);
+    
+    uint32_t opFMin(
+            uint32_t                resultType,
+            uint32_t                a,
+            uint32_t                b);
+    
    uint32_t opNMax(
            uint32_t                resultType,
            uint32_t                a,
@ -595,6 +605,12 @@ namespace dxvk {
            uint32_t                a,
            uint32_t                b);
    
+    uint32_t opFClamp(
+            uint32_t                resultType,
+            uint32_t                x,
+            uint32_t                minVal,
+            uint32_t                maxVal);
+    
    uint32_t opNClamp(
            uint32_t                resultType,
            uint32_t                x,
--- a/tests/dxbc/test_dxbc_compiler.cpp
+++ b/tests/dxbc/test_dxbc_compiler.cpp
@ -40,7 +40,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
    DxbcReader reader(dxbcCode.data(), dxbcCode.size());
    DxbcModule module(reader);
    
-    Rc<DxvkShader> shader = module.compile();
+    Rc<DxvkShader> shader = module.compile(DxbcOptions());
    shader->dump(std::ofstream(
      str::fromws(argv[2]), std::ios::binary));
    return 0;