[dxbc] Implemented vendor-specific workarounds in an attemt to fix Nvidia

This commit is contained in:
Philip Rebohle 2018-01-07 20:05:27 +01:00
parent 4e06f498dd
commit f4cd90d6fa
16 changed files with 184 additions and 23 deletions

View File

@ -23,7 +23,8 @@ namespace dxvk {
m_featureLevel (featureLevel), m_featureLevel (featureLevel),
m_featureFlags (featureFlags), m_featureFlags (featureFlags),
m_dxvkDevice (m_dxgiDevice->GetDXVKDevice()), m_dxvkDevice (m_dxgiDevice->GetDXVKDevice()),
m_dxvkAdapter (m_dxvkDevice->adapter()) { m_dxvkAdapter (m_dxvkDevice->adapter()),
m_dxbcOptions (m_dxvkDevice) {
Com<IDXGIAdapter> adapter; Com<IDXGIAdapter> adapter;
if (FAILED(m_dxgiDevice->GetAdapter(&adapter)) if (FAILED(m_dxgiDevice->GetAdapter(&adapter))
@ -1356,7 +1357,7 @@ namespace dxvk {
try { try {
*pShaderModule = D3D11ShaderModule( *pShaderModule = D3D11ShaderModule(
this, pShaderBytecode, BytecodeLength); &m_dxbcOptions, this, pShaderBytecode, BytecodeLength);
return S_OK; return S_OK;
} catch (const DxvkError& e) { } catch (const DxvkError& e) {
Logger::err(e.message()); Logger::err(e.message());

View File

@ -1,13 +1,15 @@
#pragma once #pragma once
#include "../dxbc/dxbc_options.h"
#include "../dxgi/dxgi_object.h" #include "../dxgi/dxgi_object.h"
#include "../util/com/com_private_data.h"
#include "d3d11_interfaces.h" #include "d3d11_interfaces.h"
#include "d3d11_state.h" #include "d3d11_state.h"
#include "d3d11_util.h" #include "d3d11_util.h"
#include "../util/com/com_private_data.h"
namespace dxvk { namespace dxvk {
class DxgiAdapter; class DxgiAdapter;
@ -255,6 +257,8 @@ namespace dxvk {
const Rc<DxvkDevice> m_dxvkDevice; const Rc<DxvkDevice> m_dxvkDevice;
const Rc<DxvkAdapter> m_dxvkAdapter; const Rc<DxvkAdapter> m_dxvkAdapter;
const DxbcOptions m_dxbcOptions;
D3D11DeviceContext* m_context = nullptr; D3D11DeviceContext* m_context = nullptr;
std::mutex m_resourceInitMutex; std::mutex m_resourceInitMutex;

View File

@ -8,6 +8,7 @@ namespace dxvk {
D3D11ShaderModule::D3D11ShaderModule( D3D11ShaderModule::D3D11ShaderModule(
const DxbcOptions* pDxbcOptions,
D3D11Device* pDevice, D3D11Device* pDevice,
const void* pShaderBytecode, const void* pShaderBytecode,
size_t BytecodeLength) { size_t BytecodeLength) {
@ -33,7 +34,7 @@ namespace dxvk {
} }
m_shader = module.compile(); m_shader = module.compile(*pDxbcOptions);
if (dumpPath.size() != 0) { if (dumpPath.size() != 0) {
const std::string baseName = str::format(dumpPath, "/", const std::string baseName = str::format(dumpPath, "/",

View File

@ -25,6 +25,7 @@ namespace dxvk {
D3D11ShaderModule(); D3D11ShaderModule();
D3D11ShaderModule( D3D11ShaderModule(
const DxbcOptions* pDxbcOptions,
D3D11Device* pDevice, D3D11Device* pDevice,
const void* pShaderBytecode, const void* pShaderBytecode,
size_t BytecodeLength); size_t BytecodeLength);

View File

@ -7,10 +7,12 @@ namespace dxvk {
constexpr uint32_t PerVertex_ClipDist = 2; constexpr uint32_t PerVertex_ClipDist = 2;
DxbcCompiler::DxbcCompiler( DxbcCompiler::DxbcCompiler(
const DxbcOptions& options,
const DxbcProgramVersion& version, const DxbcProgramVersion& version,
const Rc<DxbcIsgn>& isgn, const Rc<DxbcIsgn>& isgn,
const Rc<DxbcIsgn>& osgn) const Rc<DxbcIsgn>& osgn)
: m_version (version), : m_options (options),
m_version (version),
m_isgn (isgn), m_isgn (isgn),
m_osgn (osgn) { m_osgn (osgn) {
// Declare an entry point ID. We'll need it during the // Declare an entry point ID. We'll need it during the
@ -1040,13 +1042,15 @@ namespace dxvk {
break; break;
case DxbcOpcode::Max: case DxbcOpcode::Max:
dst.id = m_module.opNMax(typeId, dst.id = m_options.useSimpleMinMaxClamp
src.at(0).id, src.at(1).id); ? m_module.opFMax(typeId, src.at(0).id, src.at(1).id)
: m_module.opNMax(typeId, src.at(0).id, src.at(1).id);
break; break;
case DxbcOpcode::Min: case DxbcOpcode::Min:
dst.id = m_module.opNMin(typeId, dst.id = m_options.useSimpleMinMaxClamp
src.at(0).id, src.at(1).id); ? m_module.opFMin(typeId, src.at(0).id, src.at(1).id)
: m_module.opNMin(typeId, src.at(0).id, src.at(1).id);
break; break;
case DxbcOpcode::Mul: case DxbcOpcode::Mul:
@ -2211,7 +2215,7 @@ namespace dxvk {
// Load the texture coordinates. SPIR-V allows these // Load the texture coordinates. SPIR-V allows these
// to be float4 even if not all components are used. // to be float4 even if not all components are used.
const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask); DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
// Load reference value for depth-compare operations // Load reference value for depth-compare operations
const bool isDepthCompare = ins.op == DxbcOpcode::SampleC const bool isDepthCompare = ins.op == DxbcOpcode::SampleC
@ -2221,6 +2225,17 @@ namespace dxvk {
? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false)) ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
: DxbcRegisterValue(); : DxbcRegisterValue();
if (isDepthCompare && m_options.packDrefValueIntoCoordinates) {
const std::array<uint32_t, 2> packedCoordIds
= {{ coord.id, referenceValue.id }};
coord.type.ccount += 1;
coord.id = m_module.opCompositeConstruct(
getVectorTypeId(coord.type),
packedCoordIds.size(),
packedCoordIds.data());
}
// Load explicit gradients for sample operations that require them // Load explicit gradients for sample operations that require them
const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD; const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
@ -3034,10 +3049,9 @@ namespace dxvk {
if (value.type.ctype == DxbcScalarType::Float32) { if (value.type.ctype == DxbcScalarType::Float32) {
// Saturating only makes sense on floats // Saturating only makes sense on floats
if (modifiers.saturate) { if (modifiers.saturate) {
value.id = m_module.opNClamp( value.id = m_options.useSimpleMinMaxClamp
typeId, value.id, ? m_module.opFClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f))
m_module.constf32(0.0f), : m_module.opNClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f));
m_module.constf32(1.0f));
} }
} }

View File

@ -9,6 +9,7 @@
#include "dxbc_decoder.h" #include "dxbc_decoder.h"
#include "dxbc_defs.h" #include "dxbc_defs.h"
#include "dxbc_names.h" #include "dxbc_names.h"
#include "dxbc_options.h"
#include "dxbc_util.h" #include "dxbc_util.h"
namespace dxvk { namespace dxvk {
@ -213,6 +214,7 @@ namespace dxvk {
public: public:
DxbcCompiler( DxbcCompiler(
const DxbcOptions& options,
const DxbcProgramVersion& version, const DxbcProgramVersion& version,
const Rc<DxbcIsgn>& isgn, const Rc<DxbcIsgn>& isgn,
const Rc<DxbcIsgn>& osgn); const Rc<DxbcIsgn>& osgn);
@ -233,6 +235,7 @@ namespace dxvk {
private: private:
DxbcOptions m_options;
DxbcProgramVersion m_version; DxbcProgramVersion m_version;
SpirvModule m_module; SpirvModule m_module;

View File

@ -40,13 +40,13 @@ namespace dxvk {
} }
Rc<DxvkShader> DxbcModule::compile() const { Rc<DxvkShader> DxbcModule::compile(const DxbcOptions& options) const {
if (m_shexChunk == nullptr) if (m_shexChunk == nullptr)
throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk"); throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk");
DxbcCodeSlice slice = m_shexChunk->slice(); DxbcCodeSlice slice = m_shexChunk->slice();
DxbcCompiler compiler( DxbcCompiler compiler(options,
m_shexChunk->version(), m_shexChunk->version(),
m_isgnChunk, m_osgnChunk); m_isgnChunk, m_osgnChunk);

View File

@ -5,6 +5,7 @@
#include "dxbc_chunk_isgn.h" #include "dxbc_chunk_isgn.h"
#include "dxbc_chunk_shex.h" #include "dxbc_chunk_shex.h"
#include "dxbc_header.h" #include "dxbc_header.h"
#include "dxbc_options.h"
#include "dxbc_reader.h" #include "dxbc_reader.h"
// References used for figuring out DXBC: // References used for figuring out DXBC:
@ -46,9 +47,11 @@ namespace dxvk {
/** /**
* \brief Compiles DXBC shader to SPIR-V module * \brief Compiles DXBC shader to SPIR-V module
*
* \param [in] options DXBC compiler options
* \returns The compiled shader object * \returns The compiled shader object
*/ */
Rc<DxvkShader> compile() const; Rc<DxvkShader> compile(const DxbcOptions& options) const;
private: private:

29
src/dxbc/dxbc_options.cpp Normal file
View File

@ -0,0 +1,29 @@
#include "dxbc_options.h"
namespace dxvk {
DxbcOptions::DxbcOptions(const Rc<DxvkDevice>& device) {
const VkPhysicalDeviceProperties deviceProps
= device->adapter()->deviceProperties();
const DxvkGpuVendor vendor
= static_cast<DxvkGpuVendor>(deviceProps.vendorID);
if (vendor == DxvkGpuVendor::Nvidia) {
// From vkd3d: NMin/NMax/NClamp crash the driver.
this->useSimpleMinMaxClamp = true;
// From vkd3d: Nvidia expects the depth reference
// value to be packed into the coordinate vector.
this->packDrefValueIntoCoordinates = true;
}
// Inform the user about which workarounds are enabled
if (this->useSimpleMinMaxClamp)
Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp");
if (this->packDrefValueIntoCoordinates)
Logger::warn("DxbcOptions: Packing depth reference value into coordinate vector");
}
}

26
src/dxbc/dxbc_options.h Normal file
View File

@ -0,0 +1,26 @@
#pragma once
#include "../dxvk/dxvk_device.h"
namespace dxvk {
/**
* \brief DXBC compiler options
*
* Defines driver- or device-specific options,
* which are mostly workarounds for driver bugs.
*/
struct DxbcOptions {
DxbcOptions() { }
DxbcOptions(
const Rc<DxvkDevice>& device);
/// Use Fmin/Fmax instead of Nmin/Nmax.
bool useSimpleMinMaxClamp = false;
/// Pack the depth reference value into the
/// coordinate vector for depth-compare ops.
bool packDrefValueIntoCoordinates = false;
};
}

View File

@ -8,6 +8,7 @@ dxbc_src = files([
'dxbc_header.cpp', 'dxbc_header.cpp',
'dxbc_module.cpp', 'dxbc_module.cpp',
'dxbc_names.cpp', 'dxbc_names.cpp',
'dxbc_options.cpp',
'dxbc_reader.cpp', 'dxbc_reader.cpp',
'dxbc_util.cpp', 'dxbc_util.cpp',
]) ])

View File

@ -10,6 +10,16 @@ namespace dxvk {
class DxvkInstance; class DxvkInstance;
class DxvkSurface; class DxvkSurface;
/**
* \brief GPU vendors
* Based on PCIe IDs.
*/
enum class DxvkGpuVendor : uint16_t {
Amd = 0x1002,
Nvidia = 0x10de,
Intel = 0x8086,
};
/** /**
* \brief DXVK adapter * \brief DXVK adapter
* *

View File

@ -70,7 +70,6 @@ namespace dxvk {
if (env::getEnvVar(L"DXVK_DEBUG_LAYERS") == "1") if (env::getEnvVar(L"DXVK_DEBUG_LAYERS") == "1")
layers.push_back("VK_LAYER_LUNARG_standard_validation"); layers.push_back("VK_LAYER_LUNARG_standard_validation");
const vk::NameSet layersAvailable const vk::NameSet layersAvailable
= vk::NameSet::enumerateInstanceLayers(*m_vkl); = vk::NameSet::enumerateInstanceLayers(*m_vkl);

View File

@ -1507,7 +1507,7 @@ namespace dxvk {
} }
uint32_t SpirvModule::opNMax( uint32_t SpirvModule::opFMax(
uint32_t resultType, uint32_t resultType,
uint32_t a, uint32_t a,
uint32_t b) { uint32_t b) {
@ -1524,7 +1524,7 @@ namespace dxvk {
} }
uint32_t SpirvModule::opNMin( uint32_t SpirvModule::opFMin(
uint32_t resultType, uint32_t resultType,
uint32_t a, uint32_t a,
uint32_t b) { uint32_t b) {
@ -1539,6 +1539,40 @@ namespace dxvk {
m_code.putWord(b); m_code.putWord(b);
return resultId; return resultId;
} }
uint32_t SpirvModule::opNMax(
uint32_t resultType,
uint32_t a,
uint32_t b) {
uint32_t resultId = this->allocateId();
m_code.putIns (spv::OpExtInst, 7);
m_code.putWord(resultType);
m_code.putWord(resultId);
m_code.putWord(m_instExtGlsl450);
m_code.putWord(spv::GLSLstd450NMax);
m_code.putWord(a);
m_code.putWord(b);
return resultId;
}
uint32_t SpirvModule::opNMin(
uint32_t resultType,
uint32_t a,
uint32_t b) {
uint32_t resultId = this->allocateId();
m_code.putIns (spv::OpExtInst, 7);
m_code.putWord(resultType);
m_code.putWord(resultId);
m_code.putWord(m_instExtGlsl450);
m_code.putWord(spv::GLSLstd450NMin);
m_code.putWord(a);
m_code.putWord(b);
return resultId;
}
uint32_t SpirvModule::opSMax( uint32_t SpirvModule::opSMax(
@ -1609,7 +1643,7 @@ namespace dxvk {
} }
uint32_t SpirvModule::opNClamp( uint32_t SpirvModule::opFClamp(
uint32_t resultType, uint32_t resultType,
uint32_t x, uint32_t x,
uint32_t minVal, uint32_t minVal,
@ -1628,6 +1662,25 @@ namespace dxvk {
} }
uint32_t SpirvModule::opNClamp(
uint32_t resultType,
uint32_t x,
uint32_t minVal,
uint32_t maxVal) {
uint32_t resultId = this->allocateId();
m_code.putIns (spv::OpExtInst, 8);
m_code.putWord(resultType);
m_code.putWord(resultId);
m_code.putWord(m_instExtGlsl450);
m_code.putWord(spv::GLSLstd450NClamp);
m_code.putWord(x);
m_code.putWord(minVal);
m_code.putWord(maxVal);
return resultId;
}
uint32_t SpirvModule::opIEqual( uint32_t SpirvModule::opIEqual(
uint32_t resultType, uint32_t resultType,
uint32_t vector1, uint32_t vector1,

View File

@ -565,6 +565,16 @@ namespace dxvk {
uint32_t b, uint32_t b,
uint32_t c); uint32_t c);
uint32_t opFMax(
uint32_t resultType,
uint32_t a,
uint32_t b);
uint32_t opFMin(
uint32_t resultType,
uint32_t a,
uint32_t b);
uint32_t opNMax( uint32_t opNMax(
uint32_t resultType, uint32_t resultType,
uint32_t a, uint32_t a,
@ -595,6 +605,12 @@ namespace dxvk {
uint32_t a, uint32_t a,
uint32_t b); uint32_t b);
uint32_t opFClamp(
uint32_t resultType,
uint32_t x,
uint32_t minVal,
uint32_t maxVal);
uint32_t opNClamp( uint32_t opNClamp(
uint32_t resultType, uint32_t resultType,
uint32_t x, uint32_t x,

View File

@ -40,7 +40,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
DxbcReader reader(dxbcCode.data(), dxbcCode.size()); DxbcReader reader(dxbcCode.data(), dxbcCode.size());
DxbcModule module(reader); DxbcModule module(reader);
Rc<DxvkShader> shader = module.compile(); Rc<DxvkShader> shader = module.compile(DxbcOptions());
shader->dump(std::ofstream( shader->dump(std::ofstream(
str::fromws(argv[2]), std::ios::binary)); str::fromws(argv[2]), std::ios::binary));
return 0; return 0;