Commit b64cd11e authored by Joshua Ashton's avatar Joshua Ashton 🐸

[d3d9] Refactor constant uploading

Cleans up some messy lambda stuff we were doing.
Adds a fast path for when we aren't doing SWVP.

Eliminate std::array from the constant set structures.
parent 6c65f779
Pipeline #947 passed with stage
in 4 minutes and 2 seconds
......@@ -18,16 +18,22 @@ namespace dxvk {
};
// We make an assumption later based on the packing of this struct for copying.
struct D3D9ShaderConstantsVS {
std::array<Vector4, caps::MaxFloatConstantsSoftware> fConsts;
std::array<Vector4i, caps::MaxOtherConstantsSoftware> iConsts;
std::array<uint32_t, caps::MaxOtherConstantsSoftware / 32> bConsts;
struct D3D9ShaderConstantsVSSoftware {
Vector4 fConsts[caps::MaxFloatConstantsSoftware];
Vector4i iConsts[caps::MaxOtherConstantsSoftware];
uint32_t bConsts[caps::MaxOtherConstantsSoftware / 32];
};
struct D3D9ShaderConstantsVSHardware {
Vector4 fConsts[caps::MaxFloatConstantsVS];
Vector4i iConsts[caps::MaxOtherConstants];
uint32_t bConsts[1];
};
struct D3D9ShaderConstantsPS {
std::array<Vector4, caps::MaxFloatConstantsPS> fConsts;
std::array<Vector4i, caps::MaxOtherConstants> iConsts;
std::array<uint32_t, align(caps::MaxOtherConstants, 32) / 32> bConsts;
Vector4 fConsts[caps::MaxFloatConstantsPS];
Vector4i iConsts[caps::MaxOtherConstants];
uint32_t bConsts[1];
};
struct D3D9ConstantSets {
......
......@@ -4427,48 +4427,78 @@ namespace dxvk {
}
template <DxsoProgramType ShaderStage>
void D3D9DeviceEx::UploadConstants() {
auto UploadHelper = [&](auto& src, auto& layout, const auto& shader) {
D3D9ConstantSets& constSet = m_consts[ShaderStage];
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) {
const D3D9ConstantSets& constSet = m_consts[ShaderStage];
if (!constSet.dirty)
return;
auto* dst = reinterpret_cast<HardwareLayoutType*>(pData);
constSet.dirty = false;
if (constSet.meta->maxConstIndexF)
std::memcpy(dst->fConsts, Src.fConsts, constSet.meta->maxConstIndexF * sizeof(Vector4));
if (constSet.meta->maxConstIndexI)
std::memcpy(dst->iConsts, Src.iConsts, constSet.meta->maxConstIndexI * sizeof(Vector4i));
if (constSet.meta->maxConstIndexB)
dst->bConsts[0] = Src.bConsts[0];
}
DxvkBufferSliceHandle slice = constSet.buffer->allocSlice();
auto dstData = reinterpret_cast<uint8_t*>(slice.mapPtr);
auto srcData = &src;
template <typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader];
EmitCs([
cBuffer = constSet.buffer,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
auto dst = reinterpret_cast<uint8_t*>(pData);
if (constSet.meta->maxConstIndexF)
std::memcpy(dstData + layout.floatOffset(), srcData->fConsts.data(), constSet.meta->maxConstIndexF * 4 * sizeof(float));
if (constSet.meta->maxConstIndexI)
std::memcpy(dstData + layout.intOffset(), srcData->iConsts.data(), constSet.meta->maxConstIndexI * 4 * sizeof(INT));
if (constSet.meta->maxConstIndexB)
std::memcpy(dstData + layout.bitmaskOffset(), srcData->bConsts.data(), layout.bitmaskSize());
if (constSet.meta->maxConstIndexF)
std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta->maxConstIndexF * sizeof(Vector4));
if (constSet.meta->maxConstIndexI)
std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta->maxConstIndexI * sizeof(Vector4i));
if (constSet.meta->maxConstIndexB)
std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize());
}
if (constSet.meta->needsConstantCopies) {
Vector4* data = reinterpret_cast<Vector4*>(slice.mapPtr);
auto& shaderConsts = GetCommonShader(shader)->GetConstants();
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
D3D9ConstantSets& constSet = m_consts[ShaderStage];
for (const auto& constant : shaderConsts)
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
}
};
if (!constSet.dirty)
return;
constSet.dirty = false;
DxvkBufferSliceHandle slice = constSet.buffer->allocSlice();
EmitCs([
cBuffer = constSet.buffer,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
if constexpr (ShaderStage == DxsoProgramType::PixelShader)
UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader);
else if (likely(!CanSWVP()))
UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader);
else
UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader);
if (constSet.meta->needsConstantCopies) {
Vector4* data = reinterpret_cast<Vector4*>(slice.mapPtr);
return ShaderStage == DxsoProgramTypes::VertexShader
? UploadHelper(m_state.vsConsts, m_vsLayout, m_state.vertexShader)
: UploadHelper(m_state.psConsts, m_psLayout, m_state.pixelShader);
auto& shaderConsts = GetCommonShader(Shader)->GetConstants();
for (const auto& constant : shaderConsts)
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
}
}
template <DxsoProgramType ShaderStage>
void D3D9DeviceEx::UploadConstants() {
if constexpr (ShaderStage == DxsoProgramTypes::VertexShader)
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsVSHardware>(m_state.vsConsts, m_vsLayout, m_state.vertexShader);
else
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsPS> (m_state.psConsts, m_psLayout, m_state.pixelShader);
}
......
......@@ -776,6 +776,15 @@ namespace dxvk {
void BindRasterizerState();
void BindAlphaTestState();
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader);
template <typename SoftwareLayoutType, typename ShaderType>
inline void UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader);
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader);
template <DxsoProgramType ShaderStage>
void UploadConstants();
......@@ -1096,14 +1105,14 @@ namespace dxvk {
return D3DERR_INVALIDCALL;
if constexpr (ConstantType == D3D9ConstantType::Float) {
auto begin = set.fConsts.begin() + StartRegister;
auto end = begin + Count;
auto begin = &set.fConsts[StartRegister];
auto end = &begin[Count];
std::copy(begin, end, reinterpret_cast<Vector4*>(pConstantData));
}
else if constexpr (ConstantType == D3D9ConstantType::Int) {
auto begin = set.iConsts.begin() + StartRegister;
auto end = begin + Count;
auto begin = &set.iConsts[StartRegister];
auto end = &begin[Count];
std::copy(begin, end, reinterpret_cast<Vector4i*>(pConstantData));
}
......
......@@ -224,7 +224,7 @@ namespace dxvk {
std::array<DWORD, TextureStageStateCount>,
caps::TextureStageCount> textureStages;
D3D9ShaderConstantsVS vsConsts;
D3D9ShaderConstantsVSSoftware vsConsts;
D3D9ShaderConstantsPS psConsts;
std::array<UINT, caps::MaxStreams> streamFreq;
......@@ -258,15 +258,15 @@ namespace dxvk {
auto end = begin + Count;
if (!FloatEmu)
std::copy(begin, end, set.fConsts.begin() + StartRegister);
std::copy(begin, end, &set.fConsts[StartRegister]);
else
std::transform(begin, end, set.fConsts.begin() + StartRegister, replaceNaN);
std::transform(begin, end, &set.fConsts[StartRegister], replaceNaN);
}
else if constexpr (ConstantType == D3D9ConstantType::Int) {
auto begin = reinterpret_cast<const Vector4i*>(pConstantData);
auto end = begin + Count;
std::copy(begin, end, set.iConsts.begin() + StartRegister);
std::copy(begin, end, &set.iConsts[StartRegister]);
}
else {
for (uint32_t i = 0; i < Count; i++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment