[d3d9] Rework uploading dynamic sysmem buffers at draw time

... and handle mismatching vertex sizes and vertex strides.
This commit is contained in:
Robin Kertels 2024-03-21 21:11:10 +01:00 committed by Philip Rebohle
parent a1ce690c5c
commit 889802887f
1 changed files with 89 additions and 54 deletions

View File

@ -5159,7 +5159,7 @@ namespace dxvk {
return D3D_OK;
}
void D3D9DeviceEx::UploadDynamicSysmemBuffers(
UINT& FirstVertexIndex,
@ -5191,25 +5191,58 @@ namespace dxvk {
// First we calculate the size of that UP buffer slice
// and store all sizes and offsets into it.
uint32_t upBufferSize = 0;
std::array<uint32_t, caps::MaxStreams> vboUPBufferOffsets = {};
std::array<uint32_t, caps::MaxStreams> vboUPBufferSizes = {};
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
vboUPBufferOffsets[i] = upBufferSize;
struct VBOCopy {
uint32_t srcOffset;
uint32_t dstOffset;
uint32_t copyBufferLength;
uint32_t copyElementCount;
uint32_t copyElementSize;
uint32_t copyElementStride;
};
uint32_t totalUpBufferSize = 0;
std::array<VBOCopy, caps::MaxStreams> vboCopies = {};
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
if (likely(vbo == nullptr)) {
vboUPBufferSizes[i] = 0;
continue;
}
const uint32_t vertexStride = m_state.vertexDecl->GetSize(i);
uint32_t offset = (FirstVertexIndex + BaseVertexIndex) * vertexStride;
const uint32_t vertexBufferSize = vbo->Desc()->Size;
if (offset < vertexBufferSize) {
const uint32_t vertexDataSize = std::min(NumVertices * vertexStride, vertexBufferSize - offset);
vboUPBufferSizes[i] = vertexDataSize;
upBufferSize += vertexDataSize;
const uint32_t vertexSize = m_state.vertexDecl->GetSize(i);
const uint32_t vertexStride = m_state.vertexBuffers[i].stride;
const uint32_t srcStride = vertexStride;
const uint32_t dstStride = std::min(vertexStride, vertexSize);
uint32_t elementCount = NumVertices;
if (m_state.streamFreq[i] & D3DSTREAMSOURCE_INSTANCEDATA) {
elementCount = GetInstanceCount();
}
const uint32_t vboOffset = m_state.vertexBuffers[i].offset;
const uint32_t vertexOffset = (FirstVertexIndex + BaseVertexIndex) * srcStride;
const uint32_t vertexBufferSize = vbo->Desc()->Size;
const uint32_t srcOffset = vboOffset + vertexOffset;
if (unlikely(srcOffset > vertexBufferSize)) {
// All vertices are out of bounds
vboCopies[i].copyBufferLength = 0;
} else if (unlikely(srcOffset + elementCount * srcStride > vertexBufferSize)) {
// Some vertices are (partially) out of bounds
uint32_t boundVertexBufferRange = vertexBufferSize - vboOffset;
elementCount = boundVertexBufferRange / srcStride;
// Copy all complete vertices
vboCopies[i].copyBufferLength = elementCount * dstStride;
// Copy the remaining partial vertex
vboCopies[i].copyBufferLength += std::min(dstStride, boundVertexBufferRange % srcStride);
} else {
// No vertices are out of bounds
vboCopies[i].copyBufferLength = elementCount * dstStride;
}
vboCopies[i].copyElementCount = elementCount;
vboCopies[i].copyElementStride = srcStride;
vboCopies[i].copyElementSize = dstStride;
vboCopies[i].srcOffset = srcOffset;
vboCopies[i].dstOffset = totalUpBufferSize;
totalUpBufferSize += vboCopies[i].copyBufferLength;
}
uint32_t iboUPBufferSize = 0;
@ -5222,13 +5255,13 @@ namespace dxvk {
uint32_t indexBufferSize = ibo->Desc()->Size;
if (offset < indexBufferSize) {
iboUPBufferSize = std::min(NumIndices * indexStride, indexBufferSize - offset);
iboUPBufferOffset = upBufferSize;
upBufferSize += iboUPBufferSize;
iboUPBufferOffset = totalUpBufferSize;
totalUpBufferSize += iboUPBufferSize;
}
}
}
if (unlikely(upBufferSize == 0)) {
if (unlikely(totalUpBufferSize == 0)) {
*pDynamicVBOs = false;
if (pDynamicIBO)
*pDynamicIBO = false;
@ -5236,47 +5269,49 @@ namespace dxvk {
return;
}
auto upSlice = AllocUPBuffer(upBufferSize);
auto upSlice = AllocUPBuffer(totalUpBufferSize);
// Now copy the actual data and bind it.
if (dynamicSysmemVBOs) {
for (uint32_t i = 0; i < caps::MaxStreams; i++) {
if (unlikely(vboUPBufferSizes[i] == 0)) {
EmitCs([
cStream = i
](DxvkContext* ctx) {
ctx->bindVertexBuffer(cStream, DxvkBufferSlice(), 0);
});
m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers);
continue;
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
const VBOCopy& copy = vboCopies[i];
if (likely(copy.copyBufferLength != 0)) {
const auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr) + copy.dstOffset;
const uint8_t* src = reinterpret_cast<uint8_t*>(vbo->GetMappedSlice().mapPtr) + copy.srcOffset;
if (likely(copy.copyElementStride == copy.copyElementSize)) {
std::memcpy(data, src, copy.copyBufferLength);
} else {
for (uint32_t j = 0; j * copy.copyElementCount; j++) {
std::memcpy(data + j * copy.copyElementSize, src + j * copy.copyElementStride, copy.copyElementSize);
}
if (unlikely(copy.copyBufferLength > copy.copyElementCount * copy.copyElementSize)) {
// Partial vertex at the end
std::memcpy(
data + copy.copyElementCount * copy.copyElementSize,
src + copy.copyElementCount * copy.copyElementStride,
copy.copyBufferLength - copy.copyElementCount * copy.copyElementSize);
}
}
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
const uint32_t vertexStride = m_state.vertexDecl->GetSize(i);
uint32_t offset = (BaseVertexIndex + FirstVertexIndex) * vertexStride + m_state.vertexBuffers[i].offset;
uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr) + vboUPBufferOffsets[i];
uint8_t* src = reinterpret_cast<uint8_t*>(vbo->GetMappedSlice().mapPtr) + offset;
std::memcpy(data, src, vboUPBufferSizes[i]);
auto vboSlice = upSlice.slice.subSlice(vboUPBufferOffsets[i], vboUPBufferSizes[i]);
EmitCs([
cStream = i,
cBufferSlice = std::move(vboSlice),
cStride = vertexStride
](DxvkContext* ctx) mutable {
ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride);
});
m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers);
}
// Change the draw call parameters to reflect the changed vertex buffers
if (NumIndices != 0) {
BaseVertexIndex = -FirstVertexIndex;
} else {
FirstVertexIndex = 0;
}
auto vboSlice = upSlice.slice.subSlice(copy.dstOffset, copy.copyBufferLength);
EmitCs([
cStream = i,
cBufferSlice = std::move(vboSlice),
cStride = copy.copyElementSize
](DxvkContext* ctx) mutable {
ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride);
});
m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers);
}
// Change the draw call parameters to reflect the changed vertex buffers
if (NumIndices != 0) {
BaseVertexIndex = -FirstVertexIndex;
} else {
FirstVertexIndex = 0;
}
if (dynamicSysmemIBO) {
@ -5293,7 +5328,7 @@ namespace dxvk {
uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr) + iboUPBufferOffset;
uint8_t* src = reinterpret_cast<uint8_t*>(ibo->GetMappedSlice().mapPtr) + offset;
std::memcpy(data, src, iboUPBufferSize);
auto iboSlice = upSlice.slice.subSlice(iboUPBufferOffset, iboUPBufferSize);
EmitCs([
cBufferSlice = std::move(iboSlice),