[d3d11] Optimize UAV binding

- UpdateBuffer is faster than ClearBuffer for small updates.
- We shouldn't dispatch *two* CS commands for each UAV, one is enough.
This commit is contained in:
Philip Rebohle 2018-09-27 16:50:34 +02:00
parent 518ab2ebdd
commit 161fb6215a
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
4 changed files with 26 additions and 63 deletions

View File

@ -2181,13 +2181,8 @@ namespace dxvk {
DxbcProgramType::ComputeShader,
m_state.cs.unorderedAccessViews,
StartSlot, NumUAVs,
ppUnorderedAccessViews);
if (pUAVInitialCounts != nullptr) {
InitUnorderedAccessViewCounters(
NumUAVs, ppUnorderedAccessViews,
pUAVInitialCounts);
}
ppUnorderedAccessViews,
pUAVInitialCounts);
}
@ -2289,17 +2284,12 @@ namespace dxvk {
// UAVs are made available to all shader stages in
// the graphics pipeline even though this code may
// suggest that they are limited to the pixel shader.
// This behaviour is only required for FL_11_1.
SetUnorderedAccessViews(
DxbcProgramType::PixelShader,
m_state.ps.unorderedAccessViews,
UAVStartSlot, NumUAVs,
ppUnorderedAccessViews);
if (pUAVInitialCounts != nullptr) {
InitUnorderedAccessViewCounters(NumUAVs,
ppUnorderedAccessViews, pUAVInitialCounts);
}
ppUnorderedAccessViews,
pUAVInitialCounts);
}
BindFramebuffer(spillOnBind);
@ -2882,14 +2872,24 @@ namespace dxvk {
void D3D11DeviceContext::BindUnorderedAccessView(
UINT UavSlot,
UINT CtrSlot,
D3D11UnorderedAccessView* pUav) {
D3D11UnorderedAccessView* pUav,
UINT Counter) {
EmitCs([
cUavSlotId = UavSlot,
cCtrSlotId = CtrSlot,
cImageView = pUav != nullptr ? pUav->GetImageView() : nullptr,
cBufferView = pUav != nullptr ? pUav->GetBufferView() : nullptr,
cCounterSlice = pUav != nullptr ? pUav->GetCounterSlice() : DxvkBufferSlice()
cCounterSlice = pUav != nullptr ? pUav->GetCounterSlice() : DxvkBufferSlice(),
cCounterValue = Counter
] (DxvkContext* ctx) {
if (cCounterSlice.defined() && cCounterValue != ~0u) {
ctx->updateBuffer(
cCounterSlice.buffer(),
cCounterSlice.offset(),
sizeof(uint32_t),
&cCounterValue);
}
ctx->bindResourceView (cUavSlotId, cImageView, cBufferView);
ctx->bindResourceBuffer (cCtrSlotId, cCounterSlice);
});
@ -2989,7 +2989,8 @@ namespace dxvk {
D3D11UnorderedAccessBindings& Bindings,
UINT StartSlot,
UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews) {
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts) {
const uint32_t uavSlotId = computeResourceSlotId(
ShaderStage, DxbcBindingType::UnorderedAccessView,
StartSlot);
@ -3003,7 +3004,8 @@ namespace dxvk {
if (Bindings[StartSlot + i] != uav) {
Bindings[StartSlot + i] = uav;
BindUnorderedAccessView(uavSlotId + i, ctrSlotId + i, uav);
BindUnorderedAccessView(uavSlotId + i, ctrSlotId + i, uav,
pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u);
}
}
}
@ -3028,31 +3030,6 @@ namespace dxvk {
}
void D3D11DeviceContext::InitUnorderedAccessViewCounters(
UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts) {
for (uint32_t i = 0; i < NumUAVs; i++) {
auto uav = static_cast<D3D11UnorderedAccessView*>(ppUnorderedAccessViews[i]);
if (uav != nullptr) {
const DxvkBufferSlice counterSlice = uav->GetCounterSlice();
const D3D11UavCounter counterValue = { pUAVInitialCounts[i] };
if (counterSlice.defined() && counterValue.atomicCtr != 0xFFFFFFFFu) {
EmitCs([counterSlice, counterValue] (DxvkContext* ctx) {
ctx->clearBuffer(
counterSlice.buffer(),
counterSlice.offset(),
counterSlice.length(),
counterValue.atomicCtr);
});
}
}
}
}
void D3D11DeviceContext::GetConstantBuffers(
const D3D11ConstantBufferBindings& Bindings,
UINT StartSlot,
@ -3175,7 +3152,7 @@ namespace dxvk {
for (uint32_t i = 0; i < Bindings.size(); i++) {
BindUnorderedAccessView(
uavSlotId + i, ctrSlotId + i,
Bindings[i].ptr());
Bindings[i].ptr(), ~0u);
}
}

View File

@ -701,7 +701,8 @@ namespace dxvk {
void BindUnorderedAccessView(
UINT UavSlot,
UINT CtrSlot,
D3D11UnorderedAccessView* pUav);
D3D11UnorderedAccessView* pUav,
UINT Counter);
void DiscardBuffer(
D3D11Buffer* pBuffer);
@ -734,18 +735,14 @@ namespace dxvk {
D3D11UnorderedAccessBindings& Bindings,
UINT StartSlot,
UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews);
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts);
void SetRenderTargets(
UINT NumViews,
ID3D11RenderTargetView* const* ppRenderTargetViews,
ID3D11DepthStencilView* pDepthStencilView);
void InitUnorderedAccessViewCounters(
UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts);
void GetConstantBuffers(
const D3D11ConstantBufferBindings& Bindings,
UINT StartSlot,

View File

@ -1404,7 +1404,7 @@ namespace dxvk {
const auto& devInfo = m_dxvkAdapter->deviceProperties();
VkDeviceSize uavCounterSliceLength = align<VkDeviceSize>(
sizeof(D3D11UavCounter), devInfo.limits.minStorageBufferOffsetAlignment);
sizeof(uint32_t), devInfo.limits.minStorageBufferOffsetAlignment);
DxvkBufferCreateInfo uavCounterInfo;
uavCounterInfo.size = 4096 * uavCounterSliceLength;

View File

@ -8,17 +8,6 @@ namespace dxvk {
class D3D11Device;
/**
* \brief UAV counter structure
*
* Data structure passed to shaders that use
* append/consume buffer functionality.
*/
struct D3D11UavCounter {
uint32_t atomicCtr;
};
/**
* \brief Unordered access view
*