dxvk/src/d3d9/d3d9_swvp_emu.cpp

358 lines
14 KiB
C++

#include "d3d9_swvp_emu.h"
#include "d3d9_device.h"
#include "d3d9_vertex_declaration.h"
#include "../spirv/spirv_module.h"
namespace dxvk {
// Doesn't compare everything, only what we use in SWVP.
size_t D3D9VertexDeclHash::operator () (const D3D9CompactVertexElements& key) const {
DxvkHashState hash;
std::hash<BYTE> bytehash;
std::hash<WORD> wordhash;
for (uint32_t i = 0; i < key.size(); i++) {
const auto& element = key[i];
hash.add(wordhash(element.Stream));
hash.add(wordhash(element.Offset));
hash.add(bytehash(element.Type));
hash.add(bytehash(element.Method));
hash.add(bytehash(element.Usage));
hash.add(bytehash(element.UsageIndex));
}
return hash;
}
bool D3D9VertexDeclEq::operator () (const D3D9CompactVertexElements& a, const D3D9CompactVertexElements& b) const {
if (a.size() != b.size())
return false;
bool equal = true;
for (uint32_t i = 0; i < a.size(); i++)
equal &= std::memcmp(&a[i], &b[i], sizeof(a[0])) == 0;
return equal;
}
enum class DecltypeClass {
Float, Byte, Short, Dec, Half
};
enum DecltypeFlags {
Signed = 1,
Normalize = 2,
ReverseRGB = 4
};
struct Decltype {
DecltypeClass Class;
uint32_t VectorCount;
uint32_t Flags;
};
Decltype ClassifyDecltype(D3DDECLTYPE Type) {
switch (Type) {
case D3DDECLTYPE_FLOAT1: return { DecltypeClass::Float, 1, DecltypeFlags::Signed };
case D3DDECLTYPE_FLOAT2: return { DecltypeClass::Float, 2, DecltypeFlags::Signed };
case D3DDECLTYPE_FLOAT3: return { DecltypeClass::Float, 3, DecltypeFlags::Signed };
case D3DDECLTYPE_FLOAT4: return { DecltypeClass::Float, 4, DecltypeFlags::Signed };
case D3DDECLTYPE_D3DCOLOR: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize | DecltypeFlags::ReverseRGB };
case D3DDECLTYPE_UBYTE4: return { DecltypeClass::Byte, 4, 0 };
case D3DDECLTYPE_SHORT2: return { DecltypeClass::Short, 2, DecltypeFlags::Signed };
case D3DDECLTYPE_SHORT4: return { DecltypeClass::Short, 4, DecltypeFlags::Signed };
case D3DDECLTYPE_UBYTE4N: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize };
case D3DDECLTYPE_SHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Signed | DecltypeFlags::Normalize };
case D3DDECLTYPE_SHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Signed | DecltypeFlags::Normalize };
case D3DDECLTYPE_USHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Normalize };
case D3DDECLTYPE_USHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Normalize };
case D3DDECLTYPE_UDEC3: return { DecltypeClass::Dec, 3, 0 };
case D3DDECLTYPE_DEC3N: return { DecltypeClass::Dec, 3, DecltypeFlags::Signed | DecltypeFlags::Normalize };
case D3DDECLTYPE_FLOAT16_2: return { DecltypeClass::Half, 2, DecltypeFlags::Signed };
case D3DDECLTYPE_FLOAT16_4: return { DecltypeClass::Half, 4, DecltypeFlags::Signed };
default: return { DecltypeClass::Float, 4, DecltypeFlags::Signed };
}
}
class D3D9SWVPEmulatorGenerator {
public:
D3D9SWVPEmulatorGenerator(const std::string& name)
: m_module(spvVersion(1, 3)) {
m_entryPointId = m_module.allocateId();
m_module.setDebugSource(
spv::SourceLanguageUnknown, 0,
m_module.addDebugString(name.c_str()),
nullptr);
m_module.setMemoryModel(
spv::AddressingModelLogical,
spv::MemoryModelGLSL450);
m_module.enableCapability(spv::CapabilityGeometry);
m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeInputPoints);
m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeOutputPoints);
// This has to be > 0 for some reason even though
// we will never emit a vertex
m_module.setOutputVertices(m_entryPointId, 1);
m_module.setInvocations(m_entryPointId, 1);
m_module.functionBegin(m_module.defVoidType(), m_entryPointId, m_module.defFunctionType(
m_module.defVoidType(), 0, nullptr), spv::FunctionControlMaskNone);
m_module.opLabel(m_module.allocateId());
}
void compile(const D3D9CompactVertexElements& elements) {
uint32_t uint_t = m_module.defIntType(32, false);
uint32_t float_t = m_module.defFloatType(32);
uint32_t vec4_t = m_module.defVectorType(float_t, 4);
uint32_t vec4_singular_array_t = m_module.defArrayType(vec4_t, m_module.constu32(1));
// Setup the buffer
uint32_t bufferSlot = getSWVPBufferSlot();
uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(uint_t);
m_module.decorateArrayStride(arrayType, sizeof(uint32_t));
uint32_t buffer_t = m_module.defStructTypeUnique(1, &arrayType);
m_module.memberDecorateOffset(buffer_t, 0, 0);
m_module.decorate(buffer_t, spv::DecorationBufferBlock);
uint32_t buffer = m_module.newVar(m_module.defPointerType(buffer_t, spv::StorageClassUniform), spv::StorageClassUniform);
m_module.decorateDescriptorSet(buffer, 0);
m_module.decorateBinding(buffer, bufferSlot);
m_bufferBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
m_bufferBinding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
m_bufferBinding.resourceBinding = bufferSlot;
m_bufferBinding.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
m_bufferBinding.access = VK_ACCESS_SHADER_WRITE_BIT;
m_bufferBinding.uboSet = VK_TRUE;
// Load our builtins
uint32_t primitiveIdPtr = m_module.newVar(m_module.defPointerType(uint_t, spv::StorageClassInput), spv::StorageClassInput);
m_module.decorateBuiltIn(primitiveIdPtr, spv::BuiltInPrimitiveId);
uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr);
// The size of any given vertex
uint32_t size = 0;
for (uint32_t i = 0; i < elements.size(); i++) {
const auto& element = elements[i];
if (element.Stream == 0 && element.Type != D3DDECLTYPE_UNUSED) {
size = std::max(size, element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type)));
}
}
uint32_t vertexSize = m_module.constu32(size / sizeof(uint32_t));
//The offset of this vertex from the beginning of the buffer
uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId);
for (uint32_t i = 0; i < elements.size(); i++) {
const auto& element = elements[i];
// Load the slot associated with this element
DxsoSemantic semantic = { DxsoUsage(element.Usage), element.UsageIndex };
uint32_t elementPtr;
uint32_t elementVar;
elementPtr = m_module.newVar(m_module.defPointerType(vec4_singular_array_t, spv::StorageClassInput), spv::StorageClassInput);
if ((semantic.usage == DxsoUsage::Position || semantic.usage == DxsoUsage::PositionT) && element.UsageIndex == 0) {
// Load from builtin
m_module.decorateBuiltIn(elementPtr, spv::BuiltInPosition);
}
else {
// Load from slot
uint32_t slotIdx = RegisterLinkerSlot(semantic);
m_module.decorateLocation(elementPtr, slotIdx);
m_inputMask |= 1u << slotIdx;
}
uint32_t zero = m_module.constu32(0);
elementVar = m_module.opAccessChain(m_module.defPointerType(vec4_t, spv::StorageClassInput), elementPtr, 1, &zero);
elementVar = m_module.opLoad(vec4_t, elementVar);
// The offset of this element from the beginning of any given vertex
uint32_t perVertexElementOffset = m_module.constu32(element.Offset / sizeof(uint32_t));
// The offset of this element from the beginning of the buffer for **THIS** vertex
uint32_t elementOffset = m_module.opIAdd(uint_t, thisVertexOffset, perVertexElementOffset);
// Write to the buffer at the element offset for each part of the vector.
Decltype elementInfo = ClassifyDecltype(D3DDECLTYPE(element.Type));
if (elementInfo.Class == DecltypeClass::Dec) {
// TODO!
Logger::warn("Encountered DEC3/UDEC3N class, ignoring...");
continue;
}
uint32_t vecn_t = m_module.defVectorType(float_t, elementInfo.VectorCount);
uint32_t componentSet;
// Modifiers...
if (elementInfo.Flags & DecltypeFlags::ReverseRGB) {
std::array<uint32_t, 4> indices = { 2, 1, 0, 3 };
componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data());
}
else {
std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data());
}
if (elementInfo.Flags & DecltypeFlags::Normalize)
componentSet = m_module.opVectorTimesScalar(vecn_t, componentSet, m_module.constf32(255.0f));
bool isSigned = elementInfo.Flags & DecltypeFlags::Signed;
// Convert the component to the correct type/value.
switch (elementInfo.Class) {
case DecltypeClass::Float: break; // Do nothing!
case DecltypeClass::Byte: {
m_module.enableCapability(spv::CapabilityInt8);
uint32_t type = m_module.defIntType(8, isSigned);
type = m_module.defVectorType(type, elementInfo.VectorCount);
componentSet = isSigned
? m_module.opConvertFtoS(type, componentSet)
: m_module.opConvertFtoU(type, componentSet);
break;
}
case DecltypeClass::Short: {
m_module.enableCapability(spv::CapabilityInt16);
uint32_t type = m_module.defIntType(16, isSigned);
type = m_module.defVectorType(type, elementInfo.VectorCount);
componentSet = isSigned
? m_module.opConvertFtoS(type, componentSet)
: m_module.opConvertFtoU(type, componentSet);
break;
}
case DecltypeClass::Half: {
m_module.enableCapability(spv::CapabilityFloat16);
uint32_t type = m_module.defFloatType(16);
type = m_module.defVectorType(type, elementInfo.VectorCount);
componentSet = m_module.opFConvert(type, componentSet);
break;
}
case DecltypeClass::Dec: {
// TODO!
break;
}
}
// Bitcast to dwords before we write.
uint32_t dwordCount = GetDecltypeSize(D3DDECLTYPE(element.Type)) / sizeof(uint32_t);
uint32_t dwordVector = m_module.opBitcast(
m_module.defVectorType(uint_t, dwordCount),
componentSet);
// Finally write each dword to the buffer!
for (uint32_t i = 0; i < dwordCount; i++) {
std::array<uint32_t, 2> bufferIndices = { m_module.constu32(0), elementOffset };
uint32_t writeDest = m_module.opAccessChain(m_module.defPointerType(uint_t, spv::StorageClassUniform), buffer, bufferIndices.size(), bufferIndices.data());
uint32_t currentDword = m_module.opCompositeExtract(uint_t, dwordVector, 1, &i);
m_module.opStore(writeDest, currentDword);
elementOffset = m_module.opIAdd(uint_t, elementOffset, m_module.constu32(1));
}
}
}
Rc<DxvkShader> finalize() {
m_module.opReturn();
m_module.functionEnd();
m_module.addEntryPoint(m_entryPointId,
spv::ExecutionModelGeometry, "main");
m_module.setDebugName(m_entryPointId, "main");
DxvkShaderCreateInfo info;
info.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
info.bindingCount = 1;
info.bindings = &m_bufferBinding;
info.inputMask = m_inputMask;
return new DxvkShader(info, m_module.compile());
}
private:
SpirvModule m_module;
uint32_t m_entryPointId = 0;
uint32_t m_inputMask = 0u;
DxvkBindingInfo m_bufferBinding;
};
Rc<DxvkShader> D3D9SWVPEmulator::GetShaderModule(D3D9DeviceEx* pDevice, D3D9CompactVertexElements&& elements) {
// Use the shader's unique key for the lookup
{ std::unique_lock<dxvk::mutex> lock(m_mutex);
auto entry = m_modules.find(elements);
if (entry != m_modules.end())
return entry->second;
}
Sha1Hash hash = Sha1Hash::compute(
elements.data(), elements.size() * sizeof(elements[0]));
DxvkShaderKey key = { VK_SHADER_STAGE_GEOMETRY_BIT , hash };
std::string name = str::format("SWVP_", key.toString());
// This shader has not been compiled yet, so we have to create a
// new module. This takes a while, so we won't lock the structure.
D3D9SWVPEmulatorGenerator generator(name);
generator.compile(elements);
Rc<DxvkShader> shader = generator.finalize();
shader->setShaderKey(key);
pDevice->GetDXVKDevice()->registerShader(shader);
const std::string& dumpPath = pDevice->GetOptions()->shaderDumpPath;
if (dumpPath.size() != 0) {
std::ofstream dumpStream(
str::format(dumpPath, "/", name, ".spv"),
std::ios_base::binary | std::ios_base::trunc);
shader->dump(dumpStream);
}
// Insert the new module into the lookup table. If another thread
// has compiled the same shader in the meantime, we should return
// that object instead and discard the newly created module.
{ std::unique_lock<dxvk::mutex> lock(m_mutex);
std::pair<D3D9CompactVertexElements, Rc<DxvkShader>> pair = { std::move(elements), shader };
auto status = m_modules.insert(std::move(pair));
if (!status.second)
return status.first->second;
}
return shader;
}
}