[dxbc] Implemented vendor-specific workarounds in an attemt to fix Nvidia

This commit is contained in:
Philip Rebohle 2018-01-07 20:05:27 +01:00
parent 4e06f498dd
commit f4cd90d6fa
16 changed files with 184 additions and 23 deletions

View File

@ -23,7 +23,8 @@ namespace dxvk {
m_featureLevel (featureLevel),
m_featureFlags (featureFlags),
m_dxvkDevice (m_dxgiDevice->GetDXVKDevice()),
m_dxvkAdapter (m_dxvkDevice->adapter()) {
m_dxvkAdapter (m_dxvkDevice->adapter()),
m_dxbcOptions (m_dxvkDevice) {
Com<IDXGIAdapter> adapter;
if (FAILED(m_dxgiDevice->GetAdapter(&adapter))
@ -1356,7 +1357,7 @@ namespace dxvk {
try {
*pShaderModule = D3D11ShaderModule(
this, pShaderBytecode, BytecodeLength);
&m_dxbcOptions, this, pShaderBytecode, BytecodeLength);
return S_OK;
} catch (const DxvkError& e) {
Logger::err(e.message());

View File

@ -1,13 +1,15 @@
#pragma once
#include "../dxbc/dxbc_options.h"
#include "../dxgi/dxgi_object.h"
#include "../util/com/com_private_data.h"
#include "d3d11_interfaces.h"
#include "d3d11_state.h"
#include "d3d11_util.h"
#include "../util/com/com_private_data.h"
namespace dxvk {
class DxgiAdapter;
@ -255,6 +257,8 @@ namespace dxvk {
const Rc<DxvkDevice> m_dxvkDevice;
const Rc<DxvkAdapter> m_dxvkAdapter;
const DxbcOptions m_dxbcOptions;
D3D11DeviceContext* m_context = nullptr;
std::mutex m_resourceInitMutex;

View File

@ -8,6 +8,7 @@ namespace dxvk {
D3D11ShaderModule::D3D11ShaderModule(
const DxbcOptions* pDxbcOptions,
D3D11Device* pDevice,
const void* pShaderBytecode,
size_t BytecodeLength) {
@ -33,7 +34,7 @@ namespace dxvk {
}
m_shader = module.compile();
m_shader = module.compile(*pDxbcOptions);
if (dumpPath.size() != 0) {
const std::string baseName = str::format(dumpPath, "/",

View File

@ -25,6 +25,7 @@ namespace dxvk {
D3D11ShaderModule();
D3D11ShaderModule(
const DxbcOptions* pDxbcOptions,
D3D11Device* pDevice,
const void* pShaderBytecode,
size_t BytecodeLength);

View File

@ -7,10 +7,12 @@ namespace dxvk {
constexpr uint32_t PerVertex_ClipDist = 2;
DxbcCompiler::DxbcCompiler(
const DxbcOptions& options,
const DxbcProgramVersion& version,
const Rc<DxbcIsgn>& isgn,
const Rc<DxbcIsgn>& osgn)
: m_version (version),
: m_options (options),
m_version (version),
m_isgn (isgn),
m_osgn (osgn) {
// Declare an entry point ID. We'll need it during the
@ -1040,13 +1042,15 @@ namespace dxvk {
break;
case DxbcOpcode::Max:
dst.id = m_module.opNMax(typeId,
src.at(0).id, src.at(1).id);
dst.id = m_options.useSimpleMinMaxClamp
? m_module.opFMax(typeId, src.at(0).id, src.at(1).id)
: m_module.opNMax(typeId, src.at(0).id, src.at(1).id);
break;
case DxbcOpcode::Min:
dst.id = m_module.opNMin(typeId,
src.at(0).id, src.at(1).id);
dst.id = m_options.useSimpleMinMaxClamp
? m_module.opFMin(typeId, src.at(0).id, src.at(1).id)
: m_module.opNMin(typeId, src.at(0).id, src.at(1).id);
break;
case DxbcOpcode::Mul:
@ -2211,7 +2215,7 @@ namespace dxvk {
// Load the texture coordinates. SPIR-V allows these
// to be float4 even if not all components are used.
const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
// Load reference value for depth-compare operations
const bool isDepthCompare = ins.op == DxbcOpcode::SampleC
@ -2221,6 +2225,17 @@ namespace dxvk {
? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
: DxbcRegisterValue();
if (isDepthCompare && m_options.packDrefValueIntoCoordinates) {
const std::array<uint32_t, 2> packedCoordIds
= {{ coord.id, referenceValue.id }};
coord.type.ccount += 1;
coord.id = m_module.opCompositeConstruct(
getVectorTypeId(coord.type),
packedCoordIds.size(),
packedCoordIds.data());
}
// Load explicit gradients for sample operations that require them
const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
@ -3034,10 +3049,9 @@ namespace dxvk {
if (value.type.ctype == DxbcScalarType::Float32) {
// Saturating only makes sense on floats
if (modifiers.saturate) {
value.id = m_module.opNClamp(
typeId, value.id,
m_module.constf32(0.0f),
m_module.constf32(1.0f));
value.id = m_options.useSimpleMinMaxClamp
? m_module.opFClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f))
: m_module.opNClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f));
}
}

View File

@ -9,6 +9,7 @@
#include "dxbc_decoder.h"
#include "dxbc_defs.h"
#include "dxbc_names.h"
#include "dxbc_options.h"
#include "dxbc_util.h"
namespace dxvk {
@ -213,6 +214,7 @@ namespace dxvk {
public:
DxbcCompiler(
const DxbcOptions& options,
const DxbcProgramVersion& version,
const Rc<DxbcIsgn>& isgn,
const Rc<DxbcIsgn>& osgn);
@ -233,6 +235,7 @@ namespace dxvk {
private:
DxbcOptions m_options;
DxbcProgramVersion m_version;
SpirvModule m_module;

View File

@ -40,13 +40,13 @@ namespace dxvk {
}
Rc<DxvkShader> DxbcModule::compile() const {
Rc<DxvkShader> DxbcModule::compile(const DxbcOptions& options) const {
if (m_shexChunk == nullptr)
throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk");
DxbcCodeSlice slice = m_shexChunk->slice();
DxbcCompiler compiler(
DxbcCompiler compiler(options,
m_shexChunk->version(),
m_isgnChunk, m_osgnChunk);

View File

@ -5,6 +5,7 @@
#include "dxbc_chunk_isgn.h"
#include "dxbc_chunk_shex.h"
#include "dxbc_header.h"
#include "dxbc_options.h"
#include "dxbc_reader.h"
// References used for figuring out DXBC:
@ -46,9 +47,11 @@ namespace dxvk {
/**
* \brief Compiles DXBC shader to SPIR-V module
*
* \param [in] options DXBC compiler options
* \returns The compiled shader object
*/
Rc<DxvkShader> compile() const;
Rc<DxvkShader> compile(const DxbcOptions& options) const;
private:

29
src/dxbc/dxbc_options.cpp Normal file
View File

@ -0,0 +1,29 @@
#include "dxbc_options.h"
namespace dxvk {
DxbcOptions::DxbcOptions(const Rc<DxvkDevice>& device) {
const VkPhysicalDeviceProperties deviceProps
= device->adapter()->deviceProperties();
const DxvkGpuVendor vendor
= static_cast<DxvkGpuVendor>(deviceProps.vendorID);
if (vendor == DxvkGpuVendor::Nvidia) {
// From vkd3d: NMin/NMax/NClamp crash the driver.
this->useSimpleMinMaxClamp = true;
// From vkd3d: Nvidia expects the depth reference
// value to be packed into the coordinate vector.
this->packDrefValueIntoCoordinates = true;
}
// Inform the user about which workarounds are enabled
if (this->useSimpleMinMaxClamp)
Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp");
if (this->packDrefValueIntoCoordinates)
Logger::warn("DxbcOptions: Packing depth reference value into coordinate vector");
}
}

26
src/dxbc/dxbc_options.h Normal file
View File

@ -0,0 +1,26 @@
#pragma once
#include "../dxvk/dxvk_device.h"
namespace dxvk {
/**
* \brief DXBC compiler options
*
* Defines driver- or device-specific options,
* which are mostly workarounds for driver bugs.
*/
struct DxbcOptions {
DxbcOptions() { }
DxbcOptions(
const Rc<DxvkDevice>& device);
/// Use Fmin/Fmax instead of Nmin/Nmax.
bool useSimpleMinMaxClamp = false;
/// Pack the depth reference value into the
/// coordinate vector for depth-compare ops.
bool packDrefValueIntoCoordinates = false;
};
}

View File

@ -8,6 +8,7 @@ dxbc_src = files([
'dxbc_header.cpp',
'dxbc_module.cpp',
'dxbc_names.cpp',
'dxbc_options.cpp',
'dxbc_reader.cpp',
'dxbc_util.cpp',
])

View File

@ -10,6 +10,16 @@ namespace dxvk {
class DxvkInstance;
class DxvkSurface;
/**
* \brief GPU vendors
* Based on PCIe IDs.
*/
enum class DxvkGpuVendor : uint16_t {
Amd = 0x1002,
Nvidia = 0x10de,
Intel = 0x8086,
};
/**
* \brief DXVK adapter
*

View File

@ -70,7 +70,6 @@ namespace dxvk {
if (env::getEnvVar(L"DXVK_DEBUG_LAYERS") == "1")
layers.push_back("VK_LAYER_LUNARG_standard_validation");
const vk::NameSet layersAvailable
= vk::NameSet::enumerateInstanceLayers(*m_vkl);

View File

@ -1507,7 +1507,7 @@ namespace dxvk {
}
uint32_t SpirvModule::opNMax(
uint32_t SpirvModule::opFMax(
uint32_t resultType,
uint32_t a,
uint32_t b) {
@ -1524,7 +1524,7 @@ namespace dxvk {
}
uint32_t SpirvModule::opNMin(
uint32_t SpirvModule::opFMin(
uint32_t resultType,
uint32_t a,
uint32_t b) {
@ -1539,6 +1539,40 @@ namespace dxvk {
m_code.putWord(b);
return resultId;
}
uint32_t SpirvModule::opNMax(
uint32_t resultType,
uint32_t a,
uint32_t b) {
uint32_t resultId = this->allocateId();
m_code.putIns (spv::OpExtInst, 7);
m_code.putWord(resultType);
m_code.putWord(resultId);
m_code.putWord(m_instExtGlsl450);
m_code.putWord(spv::GLSLstd450NMax);
m_code.putWord(a);
m_code.putWord(b);
return resultId;
}
uint32_t SpirvModule::opNMin(
uint32_t resultType,
uint32_t a,
uint32_t b) {
uint32_t resultId = this->allocateId();
m_code.putIns (spv::OpExtInst, 7);
m_code.putWord(resultType);
m_code.putWord(resultId);
m_code.putWord(m_instExtGlsl450);
m_code.putWord(spv::GLSLstd450NMin);
m_code.putWord(a);
m_code.putWord(b);
return resultId;
}
uint32_t SpirvModule::opSMax(
@ -1609,7 +1643,7 @@ namespace dxvk {
}
uint32_t SpirvModule::opNClamp(
uint32_t SpirvModule::opFClamp(
uint32_t resultType,
uint32_t x,
uint32_t minVal,
@ -1628,6 +1662,25 @@ namespace dxvk {
}
uint32_t SpirvModule::opNClamp(
uint32_t resultType,
uint32_t x,
uint32_t minVal,
uint32_t maxVal) {
uint32_t resultId = this->allocateId();
m_code.putIns (spv::OpExtInst, 8);
m_code.putWord(resultType);
m_code.putWord(resultId);
m_code.putWord(m_instExtGlsl450);
m_code.putWord(spv::GLSLstd450NClamp);
m_code.putWord(x);
m_code.putWord(minVal);
m_code.putWord(maxVal);
return resultId;
}
uint32_t SpirvModule::opIEqual(
uint32_t resultType,
uint32_t vector1,

View File

@ -565,6 +565,16 @@ namespace dxvk {
uint32_t b,
uint32_t c);
uint32_t opFMax(
uint32_t resultType,
uint32_t a,
uint32_t b);
uint32_t opFMin(
uint32_t resultType,
uint32_t a,
uint32_t b);
uint32_t opNMax(
uint32_t resultType,
uint32_t a,
@ -595,6 +605,12 @@ namespace dxvk {
uint32_t a,
uint32_t b);
uint32_t opFClamp(
uint32_t resultType,
uint32_t x,
uint32_t minVal,
uint32_t maxVal);
uint32_t opNClamp(
uint32_t resultType,
uint32_t x,

View File

@ -40,7 +40,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
DxbcReader reader(dxbcCode.data(), dxbcCode.size());
DxbcModule module(reader);
Rc<DxvkShader> shader = module.compile();
Rc<DxvkShader> shader = module.compile(DxbcOptions());
shader->dump(std::ofstream(
str::fromws(argv[2]), std::ios::binary));
return 0;