[d3d9] Defer managed texture uploads until PrepareDraw and when needed

This also caches shader masks used for hazard tracking.
This commit is contained in:
Joshua Ashton 2020-01-11 04:12:59 +00:00 committed by Joshie
parent 0ea510eb9b
commit ae68e3a5bc
5 changed files with 105 additions and 27 deletions

View File

@ -357,6 +357,15 @@ namespace dxvk {
UINT Lod,
VkImageUsageFlags UsageFlags,
bool Srgb);
D3D9SubresourceBitset& GetUploadBitmask() { return m_needsUpload; }
void SetUploading(UINT Subresource, bool uploading) { m_uploading.set(Subresource, uploading); }
void ClearUploading() { m_uploading.clearAll(); }
bool GetUploading(UINT Subresource) const { return m_uploading.get(Subresource); }
void SetNeedsUpload(UINT Subresource, bool upload) { m_needsUpload.set(Subresource, upload); }
bool NeedsAnyUpload() { return m_needsUpload.any(); }
void ClearNeedsUpload() { return m_needsUpload.clearAll(); }
private:
@ -392,6 +401,9 @@ namespace dxvk {
D3D9SubresourceBitset m_dirty = { };
D3D9SubresourceBitset m_uploading = { };
D3D9SubresourceBitset m_needsUpload = { };
/**
* \brief Mip level
* \returns Size of packed mip level in bytes

View File

@ -2662,7 +2662,11 @@ namespace dxvk {
BindShader<DxsoProgramTypes::VertexShader>(
GetCommonShader(shader),
GetVertexShaderPermutation());
m_vsShaderMasks = newShader->GetShaderMask();
}
else
m_vsShaderMasks = D3D9ShaderMasks();
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
@ -2987,6 +2991,15 @@ namespace dxvk {
BindShader<DxsoProgramTypes::PixelShader>(
GetCommonShader(shader),
GetPixelShaderPermutation());
m_psShaderMasks = newShader->GetShaderMask();
}
else {
// TODO: What fixed function textures are in use?
// Currently we are making all 8 of them as in use here.
// The RT output is always 0 for fixed function.
m_psShaderMasks = FixedFunctionMask;
}
UpdateActiveHazards();
@ -3498,9 +3511,7 @@ namespace dxvk {
BindTexture(StateSampler);
// We only care about PS samplers
if (likely(StateSampler <= caps::MaxSamplers))
UpdateActiveRTTextures(StateSampler);
UpdateActiveTextures(StateSampler);
return D3D_OK;
}
@ -3902,14 +3913,17 @@ namespace dxvk {
// calling app promises not to overwrite data that is in use
// or is reading. Remember! This will only trigger for MANAGED resources
// that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting.
const bool uploading = pResource->GetUploading(Subresource);
const bool readOnly = Flags & D3DLOCK_READONLY;
const bool skipWait = (readOnly && managed) || scratch || (readOnly && systemmem && !dirty);
const bool skipWait = (managed && !uploading) || (readOnly && managed) || scratch || (readOnly && systemmem && !dirty);
if (alloced)
std::memset(physSlice.mapPtr, 0, physSlice.length);
else if (!skipWait) {
if (!WaitForResource(mappedBuffer, Flags))
return D3DERR_WASSTILLDRAWING;
pResource->ClearUploading();
}
}
else {
@ -4036,7 +4050,22 @@ namespace dxvk {
// Do we have a pending copy?
if (!pResource->GetReadOnlyLocked(Subresource)) {
// Only flush buffer -> image if we actually have an image
if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED)
if (pResource->IsManaged()) {
pResource->SetNeedsUpload(Subresource, true);
for (uint32_t tex = m_activeTextures; tex; tex &= tex - 1) {
// Guaranteed to not be nullptr...
const uint32_t i = bit::tzcnt(tex);
auto texInfo = GetCommonTexture(m_state.textures[i]);
if (texInfo == pResource) {
m_activeTexturesToUpload |= 1 << i;
// We can early out here, no need to add another index for this.
break;
}
}
}
else if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED)
this->FlushImage(pResource, Subresource);
}
@ -4075,6 +4104,8 @@ namespace dxvk {
auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo;
pResource->SetUploading(Subresource, true);
if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) {
EmitCs([
cSrcBuffer = copyBuffer,
@ -4647,20 +4678,6 @@ namespace dxvk {
}
inline D3D9ShaderMasks D3D9DeviceEx::GetShaderMasks() {
const auto* shader = GetCommonShader(m_state.pixelShader);
if (likely(shader != nullptr))
return shader->GetShaderMask();
// TODO: What fixed function textures are in use?
// Currently we are making all 8 of them as in use here.
// The RT output is always 0 for fixed function.
return D3D9ShaderMasks{ 0b1111111, 0b1 };
}
inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) {
const uint32_t bit = 1 << index;
@ -4675,21 +4692,30 @@ namespace dxvk {
}
inline void D3D9DeviceEx::UpdateActiveRTTextures(uint32_t index) {
inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index) {
const uint32_t bit = 1 << index;
m_activeRTTextures &= ~bit;
m_activeRTTextures &= ~bit;
m_activeTextures &= ~bit;
m_activeTexturesToUpload &= ~bit;
auto tex = GetCommonTexture(m_state.textures[index]);
if (tex != nullptr && tex->IsRenderTarget())
m_activeRTTextures |= bit;
if (tex != nullptr) {
m_activeTextures |= bit;
if (unlikely(tex->IsRenderTarget()))
m_activeRTTextures |= bit;
if (unlikely(tex->NeedsAnyUpload()))
m_activeTexturesToUpload |= bit;
}
UpdateActiveHazards();
}
inline void D3D9DeviceEx::UpdateActiveHazards() {
auto masks = GetShaderMasks();
auto masks = m_psShaderMasks;
masks.rtMask &= m_activeRTs;
masks.samplerMask &= m_activeRTTextures;
@ -4727,6 +4753,26 @@ namespace dxvk {
}
void D3D9DeviceEx::UploadManagedTextures(uint32_t mask) {
for (uint32_t tex = mask; tex; tex &= tex - 1) {
// Guaranteed to not be nullptr...
auto texInfo = GetCommonTexture(m_state.textures[bit::tzcnt(tex)]);
for (uint32_t i = 0; i < texInfo->GetUploadBitmask().dwordCount(); i++) {
for (uint32_t subresources = texInfo->GetUploadBitmask().dword(i); subresources; subresources &= subresources - 1) {
uint32_t subresource = i * 32 + bit::tzcnt(subresources);
this->FlushImage(texInfo, subresource);
}
}
texInfo->ClearNeedsUpload();
}
m_activeTexturesToUpload = 0;
}
template <bool Points>
void D3D9DeviceEx::UpdatePointMode() {
if constexpr (!Points) {
@ -5382,6 +5428,11 @@ namespace dxvk {
FlushBuffer(vbo);
}
uint32_t texturesToUpload = m_activeTexturesToUpload;
texturesToUpload &= m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask;
if (unlikely(texturesToUpload != 0))
UploadManagedTextures(texturesToUpload);
auto* ibo = GetCommonBuffer(m_state.indices);
if (ibo != nullptr && ibo->NeedsUpload())
FlushBuffer(ibo);

View File

@ -734,16 +734,16 @@ namespace dxvk {
void Flush();
D3D9ShaderMasks GetShaderMasks();
void UpdateActiveRTs(uint32_t index);
void UpdateActiveRTTextures(uint32_t index);
void UpdateActiveTextures(uint32_t index);
void UpdateActiveHazards();
void MarkRenderHazards();
void UploadManagedTextures(uint32_t mask);
template <bool Points>
void UpdatePointMode();
@ -1024,6 +1024,11 @@ namespace dxvk {
uint32_t m_activeRTTextures = 0;
uint32_t m_activeHazards = 0;
uint32_t m_alphaSwizzleRTs = 0;
uint32_t m_activeTextures = 0;
uint32_t m_activeTexturesToUpload = 0;
D3D9ShaderMasks m_vsShaderMasks = D3D9ShaderMasks();
D3D9ShaderMasks m_psShaderMasks = FixedFunctionMask;
D3D9ViewportInfo m_viewportInfo;

View File

@ -61,6 +61,13 @@ namespace dxvk {
m_shaders = pModule->compile(*pDxsoModuleInfo, name, AnalysisInfo, constantLayout);
m_isgn = pModule->isgn();
m_usedSamplers = pModule->usedSamplers();
// Shift up these sampler bits so we can just
// do an or per-draw in the device.
// We shift by 17 because 16 ps samplers + 1 dmap (tess)
if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT)
m_usedSamplers <<= 17;
m_usedRTs = pModule->usedRTs();
m_info = pModule->info();

View File

@ -18,6 +18,9 @@ namespace dxvk {
uint32_t rtMask;
};
static constexpr D3D9ShaderMasks FixedFunctionMask =
{ 0b1111111, 0b1 };
struct D3D9MipFilter {
bool MipsEnabled;
VkSamplerMipmapMode MipFilter;