From d8206f628659d468c870430daa271d5bec6e860d Mon Sep 17 00:00:00 2001 From: Sil Vilerino Date: Mon, 2 May 2022 10:00:10 -0700 Subject: [PATCH] d3d12: Add video decode implementation of pipe_video_codec Acked-by: Jesse Natalie Part-of: --- src/gallium/drivers/d3d12/d3d12_util.h | 246 +++ ...12_video_array_of_textures_dpb_manager.cpp | 314 ++++ ...3d12_video_array_of_textures_dpb_manager.h | 132 ++ .../drivers/d3d12/d3d12_video_buffer.cpp | 329 ++++ .../drivers/d3d12/d3d12_video_buffer.h | 89 ++ src/gallium/drivers/d3d12/d3d12_video_dec.cpp | 1361 +++++++++++++++++ src/gallium/drivers/d3d12/d3d12_video_dec.h | 244 +++ .../drivers/d3d12/d3d12_video_dec_h264.cpp | 602 ++++++++ .../drivers/d3d12/d3d12_video_dec_h264.h | 253 +++ .../d3d12/d3d12_video_dec_references_mgr.cpp | 449 ++++++ .../d3d12/d3d12_video_dec_references_mgr.h | 220 +++ .../d3d12/d3d12_video_dpb_storage_manager.h | 95 ++ .../d3d12_video_texture_array_dpb_manager.cpp | 308 ++++ .../d3d12_video_texture_array_dpb_manager.h | 126 ++ src/gallium/drivers/d3d12/d3d12_video_types.h | 119 ++ src/gallium/drivers/d3d12/meson.build | 6 + 16 files changed, 4893 insertions(+) create mode 100644 src/gallium/drivers/d3d12/d3d12_util.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_buffer.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_video_buffer.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dec.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dec.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dec_h264.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp create mode 100644 src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h create mode 100644 src/gallium/drivers/d3d12/d3d12_video_types.h diff --git a/src/gallium/drivers/d3d12/d3d12_util.h b/src/gallium/drivers/d3d12/d3d12_util.h new file mode 100644 index 00000000000..550d701f5ab --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_util.h @@ -0,0 +1,246 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_UTIL_H +#define D3D12_UTIL_H + +//------------------------------------------------------------------------------------------------ +template +inline void D3D12DecomposeSubresource( UINT Subresource, UINT MipLevels, UINT ArraySize, _Out_ T& MipSlice, _Out_ U& ArraySlice, _Out_ V& PlaneSlice ) noexcept +{ + MipSlice = static_cast(Subresource % MipLevels); + ArraySlice = static_cast((Subresource / MipLevels) % ArraySize); + PlaneSlice = static_cast(Subresource / (MipLevels * ArraySize)); +} + +//------------------------------------------------------------------------------------------------ +constexpr UINT D3D12CalcSubresource( UINT MipSlice, UINT ArraySlice, UINT PlaneSlice, UINT MipLevels, UINT ArraySize ) noexcept +{ + return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize; +} + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER +{ + CD3DX12_RESOURCE_BARRIER() = default; + explicit CD3DX12_RESOURCE_BARRIER(const D3D12_RESOURCE_BARRIER &o) noexcept : + D3D12_RESOURCE_BARRIER(o) + {} + static inline CD3DX12_RESOURCE_BARRIER Transition( + _In_ ID3D12Resource* pResource, + D3D12_RESOURCE_STATES stateBefore, + D3D12_RESOURCE_STATES stateAfter, + UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + D3D12_RESOURCE_BARRIER_FLAGS flags = D3D12_RESOURCE_BARRIER_FLAG_NONE) noexcept + { + CD3DX12_RESOURCE_BARRIER result = {}; + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + result.Flags = flags; + barrier.Transition.pResource = pResource; + barrier.Transition.StateBefore = stateBefore; + barrier.Transition.StateAfter = stateAfter; + barrier.Transition.Subresource = subresource; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER Aliasing( + _In_ ID3D12Resource* pResourceBefore, + _In_ ID3D12Resource* pResourceAfter) noexcept + { + CD3DX12_RESOURCE_BARRIER result = {}; + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barrier.Aliasing.pResourceBefore = pResourceBefore; + barrier.Aliasing.pResourceAfter = pResourceAfter; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER UAV( + _In_ ID3D12Resource* pResource) noexcept + { + CD3DX12_RESOURCE_BARRIER result = {}; + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = pResource; + return result; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC +{ + CD3DX12_RESOURCE_DESC() = default; + explicit CD3DX12_RESOURCE_DESC( const D3D12_RESOURCE_DESC& o ) noexcept : + D3D12_RESOURCE_DESC( o ) + {} + CD3DX12_RESOURCE_DESC( + D3D12_RESOURCE_DIMENSION dimension, + UINT64 alignment, + UINT64 width, + UINT height, + UINT16 depthOrArraySize, + UINT16 mipLevels, + DXGI_FORMAT format, + UINT sampleCount, + UINT sampleQuality, + D3D12_TEXTURE_LAYOUT layout, + D3D12_RESOURCE_FLAGS flags ) noexcept + { + Dimension = dimension; + Alignment = alignment; + Width = width; + Height = height; + DepthOrArraySize = depthOrArraySize; + MipLevels = mipLevels; + Format = format; + SampleDesc.Count = sampleCount; + SampleDesc.Quality = sampleQuality; + Layout = layout; + Flags = flags; + } + static inline CD3DX12_RESOURCE_DESC Buffer( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, + 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Buffer( + UINT64 width, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex1D( + DXGI_FORMAT format, + UINT64 width, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, + mipLevels, format, 1, 0, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex2D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + UINT sampleCount = 1, + UINT sampleQuality = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, + mipLevels, format, sampleCount, sampleQuality, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex3D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 depth, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, + mipLevels, format, 1, 0, layout, flags ); + } + inline UINT16 Depth() const noexcept + { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1u); } + inline UINT16 ArraySize() const noexcept + { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1u); } + inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) noexcept + { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } +}; +inline bool operator==( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) noexcept +{ + return l.Dimension == r.Dimension && + l.Alignment == r.Alignment && + l.Width == r.Width && + l.Height == r.Height && + l.DepthOrArraySize == r.DepthOrArraySize && + l.MipLevels == r.MipLevels && + l.Format == r.Format && + l.SampleDesc.Count == r.SampleDesc.Count && + l.SampleDesc.Quality == r.SampleDesc.Quality && + l.Layout == r.Layout && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) noexcept +{ return !( l == r ); } + + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES +{ + CD3DX12_HEAP_PROPERTIES() = default; + explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) noexcept : + D3D12_HEAP_PROPERTIES(o) + {} + CD3DX12_HEAP_PROPERTIES( + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) noexcept + { + Type = D3D12_HEAP_TYPE_CUSTOM; + CPUPageProperty = cpuPageProperty; + MemoryPoolPreference = memoryPoolPreference; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + explicit CD3DX12_HEAP_PROPERTIES( + D3D12_HEAP_TYPE type, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) noexcept + { + Type = type; + CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + bool IsCPUAccessible() const noexcept + { + return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || (Type == D3D12_HEAP_TYPE_CUSTOM && + (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK)); + } +}; +inline bool operator==( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) noexcept +{ + return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty && + l.MemoryPoolPreference == r.MemoryPoolPreference && + l.CreationNodeMask == r.CreationNodeMask && + l.VisibleNodeMask == r.VisibleNodeMask; +} +inline bool operator!=( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) noexcept +{ return !( l == r ); } + +#endif \ No newline at end of file diff --git a/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp b/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp new file mode 100644 index 00000000000..f230036c536 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.cpp @@ -0,0 +1,314 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_array_of_textures_dpb_manager.h" +#include +/// +/// d3d12_array_of_textures_dpb_manager +/// +// Differences with d3d12_texture_array_dpb_manager +// Uses an std::vector with individual D3D resources as backing storage instead of an D3D12 Texture Array +// Supports dynamic pool capacity extension (by pushing back a new D3D12Resource) of the pool + +#ifndef _WIN32 +#include +#endif + +#define D3D12_IGNORE_SDK_LAYERS +#include +#include "d3d12_util.h" + +void +d3d12_array_of_textures_dpb_manager::create_reconstructed_picture_allocations(ID3D12Resource **ppResource) +{ + D3D12_HEAP_PROPERTIES Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, m_nodeMask, m_nodeMask); + + CD3DX12_RESOURCE_DESC reconstructedPictureResourceDesc = CD3DX12_RESOURCE_DESC::Tex2D(m_encodeFormat, + m_encodeResolution.Width, + m_encodeResolution.Height, + 1, + 1, + 1, + 0, + m_resourceAllocFlags); + HRESULT hr = m_pDevice->CreateCommittedResource(&Properties, + D3D12_HEAP_FLAG_NONE, + &reconstructedPictureResourceDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(ppResource)); + if (FAILED(hr)) { + debug_printf("CreateCommittedResource failed with HR %x\n", hr); + assert(false); + } +} + +d3d12_array_of_textures_dpb_manager::d3d12_array_of_textures_dpb_manager( + uint32_t dpbInitialSize, + ID3D12Device * pDevice, + DXGI_FORMAT encodeSessionFormat, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution, + D3D12_RESOURCE_FLAGS resourceAllocFlags, + bool setNullSubresourcesOnAllZero, + uint32_t nodeMask, + bool allocatePool) + : m_dpbInitialSize(dpbInitialSize), + m_pDevice(pDevice), + m_encodeFormat(encodeSessionFormat), + m_encodeResolution(encodeSessionResolution), + m_resourceAllocFlags(resourceAllocFlags), + m_NullSubresourcesOnAllZero(setNullSubresourcesOnAllZero), + m_nodeMask(nodeMask) +{ + // Initialize D3D12 DPB exposed in this class implemented CRUD interface for a DPB + clear_decode_picture_buffer(); + + // Sometimes the client of this class can reuse allocations from an upper layer + // and doesn't need to get fresh/tracked allocations + if(allocatePool) + { + // Implement a reusable pool of D3D12 Resources as an array of textures + m_ResourcesPool.resize(m_dpbInitialSize); + + // Build resource pool with commitedresources with a d3ddevice and the encoding session settings (eg. resolution) and + // the reference_only flag + for (auto &reusableRes : m_ResourcesPool) { + reusableRes.isFree = true; + create_reconstructed_picture_allocations(reusableRes.pResource.GetAddressOf()); + } + } +} + +uint32_t +d3d12_array_of_textures_dpb_manager::clear_decode_picture_buffer() +{ + uint32_t untrackCount = 0; + // Mark resources used in DPB as re-usable in the resources pool + for (auto &dpbResource : m_D3D12DPB.pResources) { + // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods + // in this interface + untrackCount += untrack_reconstructed_picture_allocation({ dpbResource, 0 }) ? 1 : 0; + } + + // Clear DPB + m_D3D12DPB.pResources.clear(); + m_D3D12DPB.pSubresources.clear(); + m_D3D12DPB.pHeaps.clear(); + m_D3D12DPB.pResources.reserve(m_dpbInitialSize); + m_D3D12DPB.pSubresources.reserve(m_dpbInitialSize); + m_D3D12DPB.pHeaps.reserve(m_dpbInitialSize); + + return untrackCount; +} + +// Assigns a reference frame at a given position +void +d3d12_array_of_textures_dpb_manager::assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, + uint32_t dpbPosition) +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + assert (dpbPosition < m_D3D12DPB.pResources.size()); + + m_D3D12DPB.pResources[dpbPosition] = pReconPicture.pReconstructedPicture; + m_D3D12DPB.pSubresources[dpbPosition] = pReconPicture.ReconstructedPictureSubresource; + m_D3D12DPB.pHeaps[dpbPosition] = pReconPicture.pVideoHeap; +} + +// Adds a new reference frame at a given position +void +d3d12_array_of_textures_dpb_manager::insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, + uint32_t dpbPosition) +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + if (dpbPosition > m_D3D12DPB.pResources.size()) { + // extend capacity + m_D3D12DPB.pResources.resize(dpbPosition); + m_D3D12DPB.pSubresources.resize(dpbPosition); + m_D3D12DPB.pHeaps.resize(dpbPosition); + } + + m_D3D12DPB.pResources.insert(m_D3D12DPB.pResources.begin() + dpbPosition, pReconPicture.pReconstructedPicture); + m_D3D12DPB.pSubresources.insert(m_D3D12DPB.pSubresources.begin() + dpbPosition, + pReconPicture.ReconstructedPictureSubresource); + m_D3D12DPB.pHeaps.insert(m_D3D12DPB.pHeaps.begin() + dpbPosition, pReconPicture.pVideoHeap); +} + +// Gets a reference frame at a given position +d3d12_video_reconstructed_picture +d3d12_array_of_textures_dpb_manager::get_reference_frame(uint32_t dpbPosition) +{ + assert(dpbPosition < m_D3D12DPB.pResources.size()); + + d3d12_video_reconstructed_picture retVal = { m_D3D12DPB.pResources[dpbPosition], + m_D3D12DPB.pSubresources[dpbPosition], + m_D3D12DPB.pHeaps[dpbPosition] }; + + return retVal; +} + +// Removes a new reference frame at a given position and returns operation success +bool +d3d12_array_of_textures_dpb_manager::remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked) +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + assert(dpbPosition < m_D3D12DPB.pResources.size()); + + // If removed resource came from resource pool, mark it as free + // to free it for a new usage + // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods in + // this interface + bool resUntracked = untrack_reconstructed_picture_allocation({ m_D3D12DPB.pResources[dpbPosition], 0 }); + + if (pResourceUntracked != nullptr) { + *pResourceUntracked = resUntracked; + } + + // Remove from DPB tables + m_D3D12DPB.pResources.erase(m_D3D12DPB.pResources.begin() + dpbPosition); + m_D3D12DPB.pSubresources.erase(m_D3D12DPB.pSubresources.begin() + dpbPosition); + m_D3D12DPB.pHeaps.erase(m_D3D12DPB.pHeaps.begin() + dpbPosition); + + return true; +} + +// Returns true if the trackedItem was allocated (and is being tracked) by this class +bool +d3d12_array_of_textures_dpb_manager::is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem) +{ + for (auto &reusableRes : m_ResourcesPool) { + if (trackedItem.pReconstructedPicture == reusableRes.pResource.Get() && !reusableRes.isFree) { + return true; + } + } + return false; +} + +// Returns whether it found the tracked resource on this instance pool tracking and was able to free it +bool +d3d12_array_of_textures_dpb_manager::untrack_reconstructed_picture_allocation( + d3d12_video_reconstructed_picture trackedItem) +{ + for (auto &reusableRes : m_ResourcesPool) { + if (trackedItem.pReconstructedPicture == reusableRes.pResource.Get()) { + reusableRes.isFree = true; + return true; + } + } + return false; +} + +// Returns a fresh resource for a new reconstructed picture to be written to +// this class implements the dpb allocations as an array of textures +d3d12_video_reconstructed_picture +d3d12_array_of_textures_dpb_manager::get_new_tracked_picture_allocation() +{ + d3d12_video_reconstructed_picture freshAllocation = { // pResource + nullptr, + // subresource + 0 + }; + + // Find first (if any) available resource to (re-)use + bool bAvailableResourceInPool = false; + for (auto &reusableRes : m_ResourcesPool) { + if (reusableRes.isFree) { + bAvailableResourceInPool = true; + freshAllocation.pReconstructedPicture = reusableRes.pResource.Get(); + reusableRes.isFree = false; + break; + } + } + + if (!bAvailableResourceInPool) { + // Expand resources pool by one + debug_printf( + "[d3d12_array_of_textures_dpb_manager] ID3D12Resource Pool capacity (%ld) exceeded - extending capacity " + "and appending new allocation at the end", + m_ResourcesPool.size()); + d3d12_reusable_resource newPoolEntry = {}; + newPoolEntry.isFree = false; + create_reconstructed_picture_allocations(newPoolEntry.pResource.GetAddressOf()); + m_ResourcesPool.push_back(newPoolEntry); + + // Assign it to current ask + freshAllocation.pReconstructedPicture = newPoolEntry.pResource.Get(); + } + + return freshAllocation; +} + +uint32_t +d3d12_array_of_textures_dpb_manager::get_number_of_pics_in_dpb() +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + assert(m_D3D12DPB.pResources.size() < UINT32_MAX); + return static_cast(m_D3D12DPB.pResources.size()); +} + +d3d12_video_reference_frames +d3d12_array_of_textures_dpb_manager::get_current_reference_frames() +{ + // If all subresources are 0, the DPB is loaded with an array of individual textures, the D3D Encode API expects + // pSubresources to be null in this case The D3D Decode API expects it to be non-null even with all zeroes. + uint32_t *pSubresources = m_D3D12DPB.pSubresources.data(); + if ((std::all_of(m_D3D12DPB.pSubresources.cbegin(), m_D3D12DPB.pSubresources.cend(), [](int i) { return i == 0; })) && + m_NullSubresourcesOnAllZero) { + pSubresources = nullptr; + } + + d3d12_video_reference_frames retVal = { get_number_of_pics_in_dpb(), + m_D3D12DPB.pResources.data(), + pSubresources, + m_D3D12DPB.pHeaps.data() }; + + return retVal; +} + +// number of resources in the pool that are marked as in use +uint32_t +d3d12_array_of_textures_dpb_manager::get_number_of_in_use_allocations() +{ + uint32_t countOfInUseResourcesInPool = 0; + for (auto &reusableRes : m_ResourcesPool) { + if (!reusableRes.isFree) { + countOfInUseResourcesInPool++; + } + } + return countOfInUseResourcesInPool; +} + +// Returns the number of pictures currently stored in the DPB +uint32_t +d3d12_array_of_textures_dpb_manager::get_number_of_tracked_allocations() +{ + assert(m_ResourcesPool.size() < UINT32_MAX); + return static_cast(m_ResourcesPool.size()); +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h b/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h new file mode 100644 index 00000000000..437d5c0440b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_array_of_textures_dpb_manager.h @@ -0,0 +1,132 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#ifndef D3D12_VIDEO_ARRAY_OF_TEXTURES_DPB_MANAGER_H +#define D3D12_VIDEO_ARRAY_OF_TEXTURES_DPB_MANAGER_H + +#include "d3d12_video_dpb_storage_manager.h" +#include "d3d12_video_types.h" + +class d3d12_array_of_textures_dpb_manager : public d3d12_video_dpb_storage_manager_interface +{ + // d3d12_video_dpb_storage_manager_interface + public: + // Adds a new reference frame at a given position + void insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition); + + // Assigns a reference frame at a given position + void assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition); + + // Gets a reference frame at a given position + d3d12_video_reconstructed_picture get_reference_frame(uint32_t dpbPosition); + + // Removes a new reference frame at a given position and returns operation success + // pResourceUntracked is an optional output indicating if the removed resource was being tracked by the pool + bool remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked = nullptr); + + // Returns the resource allocation for a NEW picture + d3d12_video_reconstructed_picture get_new_tracked_picture_allocation(); + + // Returns true if the trackedItem was allocated (and is being tracked) by this class + bool is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem); + + // Returns whether it found the tracked resource on this instance pool tracking and was able to free it + bool untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem); + + // Returns the number of pictures currently stored in the DPB + uint32_t get_number_of_pics_in_dpb(); + + // Returns all the current reference frames stored + d3d12_video_reference_frames get_current_reference_frames(); + + // Removes all pictures from DPB + // returns the number of resources marked as reusable + uint32_t clear_decode_picture_buffer(); + + // number of resources in the pool that are marked as in use + uint32_t get_number_of_in_use_allocations(); + + uint32_t get_number_of_tracked_allocations(); + + // d3d12_array_of_textures_dpb_manager + public: + d3d12_array_of_textures_dpb_manager( + uint32_t dpbInitialSize, // Maximum in use resources for a DPB of size x should be x+1 for cases when a P frame + // is using the x references in the L0 list and also using an extra resource to output + // it's own recon pic. + ID3D12Device * pDevice, + DXGI_FORMAT encodeSessionFormat, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution, + D3D12_RESOURCE_FLAGS resourceAllocFlags = D3D12_RESOURCE_FLAG_NONE, + bool setNullSubresourcesOnAllZero = false, + uint32_t nodeMask = 0, + bool allocatePool = true); + ~d3d12_array_of_textures_dpb_manager() + { } + + // d3d12_array_of_textures_dpb_manager + private: + void create_reconstructed_picture_allocations(ID3D12Resource **ppResource); + + size_t m_dpbInitialSize = 0; + ID3D12Device * m_pDevice; + DXGI_FORMAT m_encodeFormat; + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC m_encodeResolution; + + // DPB with array of resources backing storage + + struct d3d12_video_dpb + { + std::vector pResources; + std::vector pSubresources; + std::vector pHeaps; + }; + + d3d12_video_dpb m_D3D12DPB; + + // Flags used when creating the resource pool + // Usually if reference only is needed for d3d12 video use + // D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE + // D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE + D3D12_RESOURCE_FLAGS m_resourceAllocFlags; + + // Pool of resources to be aliased by the DPB without giving memory ownership + // This resources are allocated and released by this implementation + struct d3d12_reusable_resource + { + ComPtr pResource; + // subresource is always 0 on this AoT implementation of the resources pool + bool isFree; + }; + + std::vector m_ResourcesPool; + + // If all subresources are 0, the DPB is loaded with an array of individual textures, the D3D Encode API expects + // pSubresources to be null in this case The D3D Decode API expects it to be non-null even with all zeroes. + bool m_NullSubresourcesOnAllZero = false; + + uint32_t m_nodeMask = 0; +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_buffer.cpp b/src/gallium/drivers/d3d12/d3d12_video_buffer.cpp new file mode 100644 index 00000000000..a6e0be37a64 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_buffer.cpp @@ -0,0 +1,329 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_buffer.h" +#include "d3d12_resource.h" +#include "d3d12_video_dec.h" +#include "d3d12_residency.h" + +#include "util/format/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_video.h" +#include "vl/vl_video_buffer.h" +#include "util/u_sampler.h" + +/** + * creates a video buffer + */ +struct pipe_video_buffer * +d3d12_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *tmpl) +{ + assert(pipe); + assert(tmpl); + + /// + /// Initialize d3d12_video_buffer + /// + + + if (!(tmpl->buffer_format == PIPE_FORMAT_NV12)) { + debug_printf("[d3d12_video_buffer] buffer_format is only supported as PIPE_FORMAT_NV12.\n"); + return nullptr; + } + + if (!(pipe_format_to_chroma_format(tmpl->buffer_format) == PIPE_VIDEO_CHROMA_FORMAT_420)) { + debug_printf( + "[d3d12_video_buffer] tmpl->buffer_format only supported as a PIPE_VIDEO_CHROMA_FORMAT_420 format.\n"); + return nullptr; + } + + // Not using new doesn't call ctor and the initializations in the class declaration are lost + struct d3d12_video_buffer *pD3D12VideoBuffer = new d3d12_video_buffer; + + // Fill base template + pD3D12VideoBuffer->base = *tmpl; + pD3D12VideoBuffer->base.buffer_format = tmpl->buffer_format; + pD3D12VideoBuffer->base.context = pipe; + pD3D12VideoBuffer->base.width = tmpl->width; + pD3D12VideoBuffer->base.height = tmpl->height; + pD3D12VideoBuffer->base.interlaced = tmpl->interlaced; + pD3D12VideoBuffer->base.associated_data = nullptr; + pD3D12VideoBuffer->base.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET; + + // Fill vtable + pD3D12VideoBuffer->base.destroy = d3d12_video_buffer_destroy; + pD3D12VideoBuffer->base.get_sampler_view_planes = d3d12_video_buffer_get_sampler_view_planes; + pD3D12VideoBuffer->base.get_sampler_view_components = d3d12_video_buffer_get_sampler_view_components; + pD3D12VideoBuffer->base.get_surfaces = d3d12_video_buffer_get_surfaces; + pD3D12VideoBuffer->base.destroy_associated_data = d3d12_video_buffer_destroy_associated_data; + + struct pipe_resource templ; + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.bind = pD3D12VideoBuffer->base.bind; + templ.format = pD3D12VideoBuffer->base.buffer_format; + // YUV 4:2:0 formats in D3D12 need to have multiple of 2 dimensions + templ.width0 = align(pD3D12VideoBuffer->base.width, 2); + templ.height0 = align(pD3D12VideoBuffer->base.height, 2); + templ.depth0 = 1; + templ.array_size = 1; + templ.flags = 0; + + // This calls d3d12_create_resource as the function ptr is set in d3d12_screen.resource_create + pD3D12VideoBuffer->texture = (struct d3d12_resource *) pipe->screen->resource_create(pipe->screen, &templ); + d3d12_promote_to_permanent_residency((struct d3d12_screen*) pipe->screen, pD3D12VideoBuffer->texture); + + if (pD3D12VideoBuffer->texture == nullptr) { + debug_printf("[d3d12_video_buffer] d3d12_video_buffer_create - Call to resource_create() to create " + "d3d12_resource failed\n"); + goto failed; + } + + pD3D12VideoBuffer->num_planes = util_format_get_num_planes(pD3D12VideoBuffer->texture->overall_format); + assert(pD3D12VideoBuffer->num_planes == 2); + return &pD3D12VideoBuffer->base; + +failed: + if (pD3D12VideoBuffer != nullptr) { + d3d12_video_buffer_destroy((struct pipe_video_buffer *) pD3D12VideoBuffer); + } + + return nullptr; +} + +/** + * destroy this video buffer + */ +void +d3d12_video_buffer_destroy(struct pipe_video_buffer *buffer) +{ + struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer; + + // Destroy pD3D12VideoBuffer->texture (if any) + if (pD3D12VideoBuffer->texture) { + pipe_resource *pBaseResource = &pD3D12VideoBuffer->texture->base.b; + pipe_resource_reference(&pBaseResource, NULL); + } + + // Destroy associated data (if any) + if (pD3D12VideoBuffer->base.associated_data != nullptr) { + d3d12_video_buffer_destroy_associated_data(pD3D12VideoBuffer->base.associated_data); + // Set to nullptr after cleanup, no dangling pointers + pD3D12VideoBuffer->base.associated_data = nullptr; + } + + // Destroy (if any) codec where the associated data came from + if (pD3D12VideoBuffer->base.codec != nullptr) { + d3d12_video_decoder_destroy(pD3D12VideoBuffer->base.codec); + // Set to nullptr after cleanup, no dangling pointers + pD3D12VideoBuffer->base.codec = nullptr; + } + + for (uint i = 0; i < pD3D12VideoBuffer->surfaces.size(); ++i) { + if (pD3D12VideoBuffer->surfaces[i] != NULL) { + pipe_surface_reference(&pD3D12VideoBuffer->surfaces[i], NULL); + } + } + + for (uint i = 0; i < pD3D12VideoBuffer->sampler_view_planes.size(); ++i) { + if (pD3D12VideoBuffer->sampler_view_planes[i] != NULL) { + pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_planes[i], NULL); + } + } + + for (uint i = 0; i < pD3D12VideoBuffer->sampler_view_components.size(); ++i) { + if (pD3D12VideoBuffer->sampler_view_components[i] != NULL) { + pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_components[i], NULL); + } + } + + delete pD3D12VideoBuffer; +} + +/* + * destroy the associated data + */ +void +d3d12_video_buffer_destroy_associated_data(void *associated_data) +{ } + +/** + * get an individual surfaces for each plane + */ +struct pipe_surface ** +d3d12_video_buffer_get_surfaces(struct pipe_video_buffer *buffer) +{ + assert(buffer); + struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer; + struct pipe_context * pipe = pD3D12VideoBuffer->base.context; + struct pipe_surface surface_template = {}; + + // Some video frameworks iterate over [0..VL_MAX_SURFACES) and ignore the nullptr entries + // So we have to null initialize the other surfaces not used from [num_planes..VL_MAX_SURFACES) + // Like in src/gallium/frontends/va/surface.c + pD3D12VideoBuffer->surfaces.resize(VL_MAX_SURFACES, nullptr); + + // pCurPlaneResource refers to the planar resource, not the overall resource. + // in d3d12_resource this is handled by having a linked list of planes with + // d3dRes->base.next ptr to next plane resource + // starting with the plane 0 being the overall resource + struct pipe_resource *pCurPlaneResource = &pD3D12VideoBuffer->texture->base.b; + + for (uint PlaneSlice = 0; PlaneSlice < pD3D12VideoBuffer->num_planes; ++PlaneSlice) { + if (!pD3D12VideoBuffer->surfaces[PlaneSlice]) { + memset(&surface_template, 0, sizeof(surface_template)); + surface_template.format = + util_format_get_plane_format(pD3D12VideoBuffer->texture->overall_format, PlaneSlice); + + pD3D12VideoBuffer->surfaces[PlaneSlice] = + pipe->create_surface(pipe, pCurPlaneResource, &surface_template); + + if (!pD3D12VideoBuffer->surfaces[PlaneSlice]) { + goto error; + } + } + pCurPlaneResource = pCurPlaneResource->next; + } + + return pD3D12VideoBuffer->surfaces.data(); + +error: + for (uint PlaneSlice = 0; PlaneSlice < pD3D12VideoBuffer->num_planes; ++PlaneSlice) { + pipe_surface_reference(&pD3D12VideoBuffer->surfaces[PlaneSlice], NULL); + } + + return nullptr; +} + +/** + * get an individual sampler view for each plane + */ +struct pipe_sampler_view ** +d3d12_video_buffer_get_sampler_view_planes(struct pipe_video_buffer *buffer) +{ + assert(buffer); + struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer; + struct pipe_context * pipe = pD3D12VideoBuffer->base.context; + struct pipe_sampler_view samplerViewTemplate; + + // Some video frameworks iterate over [0..VL_MAX_SURFACES) and ignore the nullptr entries + // So we have to null initialize the other surfaces not used from [num_planes..VL_MAX_SURFACES) + // Like in src/gallium/frontends/vdpau/surface.c + pD3D12VideoBuffer->sampler_view_planes.resize(VL_MAX_SURFACES, nullptr); + + // pCurPlaneResource refers to the planar resource, not the overall resource. + // in d3d12_resource this is handled by having a linked list of planes with + // d3dRes->base.next ptr to next plane resource + // starting with the plane 0 being the overall resource + struct pipe_resource *pCurPlaneResource = &pD3D12VideoBuffer->texture->base.b; + + for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) { + if (!pD3D12VideoBuffer->sampler_view_planes[i]) { + assert(pCurPlaneResource); // the d3d12_resource has a linked list with the exact name of number of elements + // as planes + + memset(&samplerViewTemplate, 0, sizeof(samplerViewTemplate)); + u_sampler_view_default_template(&samplerViewTemplate, pCurPlaneResource, pCurPlaneResource->format); + + pD3D12VideoBuffer->sampler_view_planes[i] = + pipe->create_sampler_view(pipe, pCurPlaneResource, &samplerViewTemplate); + + if (!pD3D12VideoBuffer->sampler_view_planes[i]) { + goto error; + } + } + + pCurPlaneResource = pCurPlaneResource->next; + } + + return pD3D12VideoBuffer->sampler_view_planes.data(); + +error: + for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) { + pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_planes[i], NULL); + } + + return nullptr; +} + +/** + * get an individual sampler view for each component + */ +struct pipe_sampler_view ** +d3d12_video_buffer_get_sampler_view_components(struct pipe_video_buffer *buffer) +{ + assert(buffer); + struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) buffer; + struct pipe_context * pipe = pD3D12VideoBuffer->base.context; + struct pipe_sampler_view samplerViewTemplate; + + // pCurPlaneResource refers to the planar resource, not the overall resource. + // in d3d12_resource this is handled by having a linked list of planes with + // d3dRes->base.next ptr to next plane resource + // starting with the plane 0 being the overall resource + struct pipe_resource *pCurPlaneResource = &pD3D12VideoBuffer->texture->base.b; + + // At the end of the loop, "component" will have the total number of items valid in sampler_view_components + // since component can end up being <= VL_NUM_COMPONENTS, we assume VL_NUM_COMPONENTS first and then resize/adjust to + // fit the container size pD3D12VideoBuffer->sampler_view_components to the actual components number + pD3D12VideoBuffer->sampler_view_components.resize(VL_NUM_COMPONENTS, nullptr); + uint component = 0; + + for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) { + // For example num_components would be 1 for the Y plane (R8 in NV12), 2 for the UV plane (R8G8 in NV12) + unsigned num_components = util_format_get_nr_components(pCurPlaneResource->format); + + for (uint j = 0; j < num_components; ++j, ++component) { + assert(component < VL_NUM_COMPONENTS); + + if (!pD3D12VideoBuffer->sampler_view_components[component]) { + memset(&samplerViewTemplate, 0, sizeof(samplerViewTemplate)); + u_sampler_view_default_template(&samplerViewTemplate, pCurPlaneResource, pCurPlaneResource->format); + samplerViewTemplate.swizzle_r = samplerViewTemplate.swizzle_g = samplerViewTemplate.swizzle_b = + PIPE_SWIZZLE_X + j; + samplerViewTemplate.swizzle_a = PIPE_SWIZZLE_1; + + pD3D12VideoBuffer->sampler_view_components[component] = + pipe->create_sampler_view(pipe, pCurPlaneResource, &samplerViewTemplate); + if (!pD3D12VideoBuffer->sampler_view_components[component]) { + goto error; + } + } + } + + pCurPlaneResource = pCurPlaneResource->next; + } + + // Adjust size to fit component <= VL_NUM_COMPONENTS + pD3D12VideoBuffer->sampler_view_components.resize(component); + + return pD3D12VideoBuffer->sampler_view_components.data(); + +error: + for (uint i = 0; i < pD3D12VideoBuffer->num_planes; ++i) { + pipe_sampler_view_reference(&pD3D12VideoBuffer->sampler_view_components[i], NULL); + } + + return nullptr; +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_buffer.h b/src/gallium/drivers/d3d12/d3d12_video_buffer.h new file mode 100644 index 00000000000..62f0454a2c3 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_buffer.h @@ -0,0 +1,89 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#ifndef D3D12_VIDEO_BUFFER_H +#define D3D12_VIDEO_BUFFER_H + +#include "pipe/p_context.h" +#include "pipe/p_video_codec.h" +#include + +/// +/// Pipe video buffer interface starts +/// + +/** + * creates a video buffer + */ +struct pipe_video_buffer * +d3d12_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *tmpl); + +/** + * destroy this video buffer + */ +void +d3d12_video_buffer_destroy(struct pipe_video_buffer *buffer); + +/** + * get an individual sampler view for each plane + */ +struct pipe_sampler_view ** +d3d12_video_buffer_get_sampler_view_planes(struct pipe_video_buffer *buffer); + +/** + * get an individual sampler view for each component + */ +struct pipe_sampler_view ** +d3d12_video_buffer_get_sampler_view_components(struct pipe_video_buffer *buffer); + +/** + * get an individual surfaces for each plane + */ +struct pipe_surface ** +d3d12_video_buffer_get_surfaces(struct pipe_video_buffer *buffer); + +/* + * destroy the associated data + */ +void +d3d12_video_buffer_destroy_associated_data(void *associated_data); + +/** + * output for decoding / input for displaying + */ +struct d3d12_video_buffer +{ + pipe_video_buffer base; + struct d3d12_resource * texture; + uint num_planes; + std::vector surfaces; + std::vector sampler_view_planes; + std::vector sampler_view_components; +}; + +/// +/// Pipe video buffer interface ends +/// + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec.cpp new file mode 100644 index 00000000000..73c9eb238c2 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dec.cpp @@ -0,0 +1,1361 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_context.h" +#include "d3d12_format.h" +#include "d3d12_resource.h" +#include "d3d12_screen.h" +#include "d3d12_surface.h" +#include "d3d12_video_dec.h" +#include "d3d12_video_dec_h264.h" +#include "d3d12_video_buffer.h" +#include "d3d12_residency.h" + +#include "vl/vl_video_buffer.h" +#include "util/format/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_video.h" +#include "util/vl_vlc.h" + +struct pipe_video_codec * +d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec) +{ + /// + /// Initialize d3d12_video_decoder + /// + + + // Not using new doesn't call ctor and the initializations in the class declaration are lost + struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder; + + pD3D12Dec->base = *codec; + pD3D12Dec->m_screen = context->screen; + + pD3D12Dec->base.context = context; + pD3D12Dec->base.width = codec->width; + pD3D12Dec->base.height = codec->height; + // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock + // / get_feedback for encode) + pD3D12Dec->base.destroy = d3d12_video_decoder_destroy; + pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame; + pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream; + pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame; + pD3D12Dec->base.flush = d3d12_video_decoder_flush; + + pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile); + pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile); + pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile); + + /// + /// Try initializing D3D12 Video device and check for device caps + /// + + struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context; + pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen); + + /// + /// Create decode objects + /// + HRESULT hr = S_OK; + if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface( + IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) { + debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n"); + goto failed; + } + + if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { + debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on " + "d3d12_video_decoder_check_caps_and_create_decoder\n"); + goto failed; + } + + if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { + debug_printf( + "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n"); + goto failed; + } + + if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { + debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on " + "d3d12_video_decoder_create_video_state_buffers\n"); + goto failed; + } + + pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat }; + hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, + &pD3D12Dec->m_decodeFormatInfo, + sizeof(pD3D12Dec->m_decodeFormatInfo)); + if(FAILED(hr)) { + debug_printf("CheckFeatureSupport failed with HR %x\n", hr); + goto failed; + } + + return &pD3D12Dec->base; + +failed: + if (pD3D12Dec != nullptr) { + d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec); + } + + return nullptr; +} + +/** + * Destroys a d3d12_video_decoder + * Call destroy_XX for applicable XX nested member types before deallocating + * Destroy methods should check != nullptr on their input target argument as this method can be called as part of + * cleanup from failure on the creation method + */ +void +d3d12_video_decoder_destroy(struct pipe_video_codec *codec) +{ + if (codec == nullptr) { + return; + } + + d3d12_video_decoder_flush(codec); // Flush pending work before destroying. + + struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; + + // + // Destroys a decoder + // Call destroy_XX for applicable XX nested member types before deallocating + // Destroy methods should check != nullptr on their input target argument as this method can be called as part of + // cleanup from failure on the creation method + // + + // No need for d3d12_destroy_video_objects + // All the objects created here are smart pointer members of d3d12_video_decoder + // No need for d3d12_destroy_video_decoder_and_heap + // All the objects created here are smart pointer members of d3d12_video_decoder + // No need for d3d12_destroy_video_dpbmanagers + // All the objects created here are smart pointer members of d3d12_video_decoder + + // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder + + // Call dtor to make ComPtr work + delete pD3D12Dec; +} + +/** + * start decoding of a new frame + */ +void +d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in + // d3d12_video_decoder_decode_bitstream + struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; + assert(pD3D12Dec); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); +} + +/** + * decode a bitstream + */ +void +d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *buffers, + const unsigned *sizes) +{ + struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; + assert(pD3D12Dec); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + assert(pD3D12Dec->m_spD3D12VideoDevice); + assert(pD3D12Dec->m_spDecodeCommandQueue); + assert(pD3D12Dec->m_pD3D12Screen); + struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target; + assert(pD3D12VideoBuffer); + + /// + /// Compressed bitstream buffers + /// + + /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED + /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 -> + /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If + /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3. + /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes + /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED + // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that: + // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data + // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate + // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all + // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed + // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start + // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at + // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we + // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does. + + // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the + // combined bitstream of all decode_bitstream calls. + + // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the + // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode + // (optional) , sliceN} + + if (num_buffers > 2) // Assume this means multiple slices at once in a decode_bitstream call + { + // Based on VA frontend codebase, this never happens for video (no JPEG) + // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream + + // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it + // was a series of different calls + + // group by start codes and buffers and perform calls for the number of slices + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected " + "for fenceValue: %d, breaking down the calls into one per slice\n", + pD3D12Dec->m_fenceValue); + + size_t curBufferIdx = 0; + + // Vars to be used for the delegation calls to decode_bitstream + unsigned call_num_buffers = 0; + const void *const *call_buffers = nullptr; + const unsigned *call_sizes = nullptr; + + while (curBufferIdx < num_buffers) { + // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a + // startcode+slicedata or just slicedata call + call_buffers = &buffers[curBufferIdx]; + call_sizes = &sizes[curBufferIdx]; + + // Usually start codes are less or equal than 4 bytes + // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the + // current buffer. + call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1; + + // Delegate call with one or two buffers only + d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes); + + curBufferIdx += call_num_buffers; // Consume from the loop the buffers sent in the last call + } + } else { + /// + /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0]. + /// + + // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be + // handled by flattening all the buffers into a single one and passing that to HW. + + size_t totalReceivedBuffersSize = 0u; // Combined size of all sizes[] + for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) { + totalReceivedBuffersSize += sizes[bufferIdx]; + } + + // Bytes of data pre-staged before this decode_frame call + size_t preStagedDataSize = pD3D12Dec->m_stagingDecodeBitstream.size(); + + // Extend the staging buffer size, as decode_frame can be called several times before end_frame + pD3D12Dec->m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize); + + // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new + // buffers will be appended + uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_stagingDecodeBitstream.data() + preStagedDataSize; + + // Append new data at the end. + size_t dstOffset = 0u; + for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) { + memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]); + dstOffset += sizes[bufferIdx]; + } + + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + } +} + +void +d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + switch (pD3D12Dec->m_d3d12DecProfileType) { + case d3d12_video_decode_profile_type_h264: + { + pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture; + pD3D12Dec->m_pCurrentDecodeTarget = target; + pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref; + } break; + + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } +} + +/** + * end decoding of the current frame + */ +void +d3d12_video_decoder_end_frame(struct pipe_video_codec *codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; + assert(pD3D12Dec); + struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen; + assert(pD3D12Screen); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + assert(pD3D12Dec->m_spD3D12VideoDevice); + assert(pD3D12Dec->m_spDecodeCommandQueue); + struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target; + assert(pD3D12VideoBuffer); + + /// + /// Store current decode output target texture and reference textures from upper layer + /// + d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture); + + /// + /// Codec header picture parameters buffers + /// + + d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer); + assert(pD3D12Dec->m_picParamsBuffer.size() > 0); + + /// + /// Prepare Slice control buffers before clearing staging buffer + /// + assert(pD3D12Dec->m_stagingDecodeBitstream.size() > 0); // Make sure the staging wasn't cleared yet in end_frame + d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture); + assert(pD3D12Dec->m_SliceControlBuffer.size() > 0); + + /// + /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer + /// + + uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_stagingDecodeBitstream.size(); + uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_stagingDecodeBitstream.data(); + + // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory + if (pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) { + if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " + "d3d12_video_decoder_create_staging_bitstream_buffer\n"); + debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + assert(false); + return; + } + } + + // Upload frame bitstream CPU data to ID3D12Resource buffer + pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize = + sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize. + assert(pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize <= + pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize); + + /* One-shot transfer operation with data supplied in a user + * pointer. + */ + pipe_resource *pPipeCompressedBufferObj = + d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get()); + assert(pPipeCompressedBufferObj); + pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context + pPipeCompressedBufferObj, // dst buffer + PIPE_MAP_WRITE, // usage PIPE_MAP_x + 0, // offset + sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size + sliceDataStagingBufferPtr // data + ); + + // Flush buffer_subdata batch and wait on this CPU thread for GPU work completion + // before deleting the source CPU buffer below + struct pipe_fence_handle *pUploadGPUCompletionFence = NULL; + pD3D12Dec->base.context->flush(pD3D12Dec->base.context, + &pUploadGPUCompletionFence, + PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); + assert(pUploadGPUCompletionFence); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for " + "buffer_subdata to upload compressed bitstream.\n"); + pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, pUploadGPUCompletionFence, PIPE_TIMEOUT_INFINITE); + pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pUploadGPUCompletionFence, NULL); + + // [After buffer_subdata GPU work is finished] Clear CPU staging buffer now that end_frame is called and was uploaded + // to GPU for DecodeFrame call. + pD3D12Dec->m_stagingDecodeBitstream.resize(0); + + /// + /// Proceed to record the GPU Decode commands + /// + + // Requested conversions by caller upper layer (none for now) + d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {}; + + /// + /// Record DecodeFrame operation and resource state transitions. + /// + + // Translate input D3D12 structure + D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {}; + + d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get(); + d3d12InputArguments.CompressedBitstream.Offset = 0u; + constexpr uint64_t d3d12BitstreamOffsetAlignment = + 128u; // specified in + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier + assert((d3d12InputArguments.CompressedBitstream.Offset == 0) || + ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0)); + d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize; + + D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { + CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_DECODE_READ), + }; + pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); + + // Schedule reverse (back to common) transitions before command list closes for current frame + pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( + CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, + D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, + D3D12_RESOURCE_STATE_COMMON)); + + /// + /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for + /// display/consumption + /// + ID3D12Resource *pOutputD3D12Texture; + uint outputD3D12Subresource = 0; + + /// + /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output + /// and to store as future reference in DPB + /// + ID3D12Resource *pRefOnlyOutputD3D12Texture; + uint refOnlyOutputD3D12Subresource = 0; + + if(!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec, + target, + pD3D12VideoBuffer, + &pOutputD3D12Texture, // output + &outputD3D12Subresource, // output + &pRefOnlyOutputD3D12Texture, // output + &refOnlyOutputD3D12Subresource, // output + requestedConversionArguments)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " + "d3d12_video_decoder_prepare_for_decode_frame\n"); + debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + assert(false); + return; + } + + /// + /// Set codec picture parameters CPU buffer + /// + + d3d12InputArguments.NumFrameArguments = + 1u; // Only the codec data received from the above layer with picture params + d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { + D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, + static_cast(pD3D12Dec->m_picParamsBuffer.size()), + pD3D12Dec->m_picParamsBuffer.data(), + }; + + if (pD3D12Dec->m_SliceControlBuffer.size() > 0) { + d3d12InputArguments.NumFrameArguments++; + d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { + D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL, + static_cast(pD3D12Dec->m_SliceControlBuffer.size()), + pD3D12Dec->m_SliceControlBuffer.data(), + }; + } + + if (pD3D12Dec->m_InverseQuantMatrixBuffer.size() > 0) { + d3d12InputArguments.NumFrameArguments++; + d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { + D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, + static_cast(pD3D12Dec->m_InverseQuantMatrixBuffer.size()), + pD3D12Dec->m_InverseQuantMatrixBuffer.data(), + }; + } + + d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames(); + if (D3D12_DEBUG_VERBOSE & d3d12_debug) { + pD3D12Dec->m_spDPBManager->print_dpb(); + } + + d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get(); + + // translate output D3D12 structure + D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {}; + d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture; + d3d12OutputArguments.OutputSubresource = outputD3D12Subresource; + + bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags & + d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0; + if (fReferenceOnly) { + d3d12OutputArguments.ConversionArguments.Enable = TRUE; + + assert(pRefOnlyOutputD3D12Texture); + d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture; + d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource; + + const D3D12_RESOURCE_DESC &descReference = + d3d12OutputArguments.ConversionArguments.pReferenceTexture2D->GetDesc(); + d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space( + !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)), + util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/, + /* StudioRGB= */ false, + /* P709= */ true, + /* StudioYUV= */ true); + + const D3D12_RESOURCE_DESC &descOutput = d3d12OutputArguments.pOutputTexture2D->GetDesc(); + d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space( + !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)), + util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/, + /* StudioRGB= */ false, + /* P709= */ true, + /* StudioYUV= */ true); + + const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = pD3D12Dec->m_spVideoDecoderHeap->GetDesc(); + d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth; + d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight; + } else { + d3d12OutputArguments.ConversionArguments.Enable = FALSE; + } + + CD3DX12_RESOURCE_DESC outputDesc(d3d12OutputArguments.pOutputTexture2D->GetDesc()); + uint32_t MipLevel, PlaneSlice, ArraySlice; + D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource, + outputDesc.MipLevels, + outputDesc.ArraySize(), + MipLevel, + ArraySlice, + PlaneSlice); + + for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { + uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); + + D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { + CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, + planeOutputSubresource), + }; + pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); + } + + // Schedule reverse (back to common) transitions before command list closes for current frame + for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { + uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); + pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( + CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D, + D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, + D3D12_RESOURCE_STATE_COMMON, + planeOutputSubresource)); + } + + // Record DecodeFrame + + pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(), + &d3d12OutputArguments, + &d3d12InputArguments); + + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + + /// + /// Flush work to the GPU and blocking wait until decode finishes + /// + pD3D12Dec->m_needsGPUFlush = true; + d3d12_video_decoder_flush(codec); + + if (!pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { + /// + /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation() + /// We cannot use the standalone video buffer allocation directly and we must use instead + /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same + /// allocation + /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes + /// + + // Get destination resource + struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target); + + // Get source pipe_resource + pipe_resource *pPipeSrc = + d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D); + assert(pPipeSrc); + + // Copy all format subresources/texture planes + + for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { + assert(d3d12OutputArguments.OutputSubresource < INT16_MAX); + struct pipe_box box = { 0, + 0, + // src array slice, taken as Z for TEXTURE_2D_ARRAY + static_cast(d3d12OutputArguments.OutputSubresource), + static_cast(pPipeDstViews[PlaneSlice]->texture->width0), + static_cast(pPipeDstViews[PlaneSlice]->texture->height0), + 1 }; + + pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context, + pPipeDstViews[PlaneSlice]->texture, // dst + 0, // dst level + 0, // dstX + 0, // dstY + 0, // dstZ + (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next, // src + 0, // src level + &box); + } + // Flush resource_copy_region batch and wait on this CPU thread for GPU work completion + struct pipe_fence_handle *completion_fence = NULL; + pD3D12Dec->base.context->flush(pD3D12Dec->base.context, + &completion_fence, + PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); + assert(completion_fence); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for " + "resource_copy_region on decoded frame.\n"); + pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, completion_fence, PIPE_TIMEOUT_INFINITE); + pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &completion_fence, NULL); + } +} + +/** + * flush any outstanding command buffers to the hardware + * should be called before a video_buffer is acessed by the gallium frontend again + */ +void +d3d12_video_decoder_flush(struct pipe_video_codec *codec) +{ + struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; + assert(pD3D12Dec); + assert(pD3D12Dec->m_spD3D12VideoDevice); + assert(pD3D12Dec->m_spDecodeCommandQueue); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on " + "fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + + if (!pD3D12Dec->m_needsGPUFlush) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n"); + } else { + HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); + if (hr != S_OK) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush" + " - D3D12Device was removed BEFORE commandlist " + "execution with HR %x.\n", + hr); + goto flush_fail; + } + + // Close and execute command list and wait for idle on CPU blocking + // this method before resetting list and allocator for next submission. + + if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) { + pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(), + pD3D12Dec->m_transitionsBeforeCloseCmdList.data()); + pD3D12Dec->m_transitionsBeforeCloseCmdList.clear(); + } + + hr = pD3D12Dec->m_spDecodeCommandList->Close(); + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr); + goto flush_fail; + } + + ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() }; + pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists); + pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue); + pD3D12Dec->m_spFence->SetEventOnCompletion(pD3D12Dec->m_fenceValue, nullptr); + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - ExecuteCommandLists finished on signal with " + "fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + + hr = pD3D12Dec->m_spCommandAllocator->Reset(); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n", + hr); + goto flush_fail; + } + + hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_spCommandAllocator.Get()); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n", + hr); + goto flush_fail; + } + + // Validate device was not removed + hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); + if (hr != S_OK) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush" + " - D3D12Device was removed AFTER commandlist " + "execution with HR %x, but wasn't before.\n", + hr); + goto flush_fail; + } + + debug_printf( + "[d3d12_video_decoder] d3d12_video_decoder_flush - GPU signaled execution finalized for fenceValue: %d\n", + pD3D12Dec->m_fenceValue); + + pD3D12Dec->m_fenceValue++; + pD3D12Dec->m_needsGPUFlush = false; + } + return; + +flush_fail: + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue); + assert(false); +} + +bool +d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec) +{ + assert(pD3D12Dec->m_spD3D12VideoDevice); + + D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE }; + HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc, + IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue " + "failed with HR %x\n", + hr); + return false; + } + + hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pD3D12Dec->m_spFence)); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n", + hr); + return false; + } + + hr = pD3D12Screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, + IID_PPV_ARGS(pD3D12Dec->m_spCommandAllocator.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to " + "CreateCommandAllocator failed with HR %x\n", + hr); + return false; + } + + hr = pD3D12Screen->dev->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, + pD3D12Dec->m_spCommandAllocator.Get(), + nullptr, + IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf())); + + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList " + "failed with HR %x\n", + hr); + return false; + } + + D3D12_COMMAND_QUEUE_DESC copyQueueDesc = { D3D12_COMMAND_LIST_TYPE_COPY }; + hr = pD3D12Screen->dev->CreateCommandQueue(©QueueDesc, IID_PPV_ARGS(pD3D12Dec->m_spCopyQueue.GetAddressOf())); + + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue " + "failed with HR %x\n", + hr); + return false; + } + + return true; +} + +bool +d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec) +{ + assert(pD3D12Dec->m_spD3D12VideoDevice); + + pD3D12Dec->m_decoderDesc = {}; + + D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile, + D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE, + D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE }; + + D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {}; + decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex; + decodeSupport.Configuration = decodeConfiguration; + decodeSupport.Width = pD3D12Dec->base.width; + decodeSupport.Height = pD3D12Dec->base.height; + decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat; + // no info from above layer on framerate/bitrate + decodeSupport.FrameRate.Numerator = 0; + decodeSupport.FrameRate.Denominator = 0; + decodeSupport.BitRate = 0; + + HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT, + &decodeSupport, + sizeof(decodeSupport)); + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport " + "failed with HR %x\n", + hr); + return false; + } + + if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - " + "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n"); + return false; + } + + pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags; + pD3D12Dec->m_tier = decodeSupport.DecodeTier; + + if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) { + pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures; + } + + if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) { + pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height; + } + + if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) { + pD3D12Dec->m_ConfigDecoderSpecificFlags |= + d3d12_video_decode_config_specific_flag_reference_only_textures_required; + } + + pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask; + pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration; + + hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc, + IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder " + "failed with HR %x\n", + hr); + return false; + } + + return true; +} + +bool +d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec) +{ + assert(pD3D12Dec->m_spD3D12VideoDevice); + if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, + pD3D12Dec, + pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on " + "d3d12_video_decoder_create_staging_bitstream_buffer\n"); + return false; + } + + return true; +} + +bool +d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec, + uint64_t bufSize) +{ + assert(pD3D12Dec->m_spD3D12VideoDevice); + + if (pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get() != nullptr) { + pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Reset(); + } + + auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask); + auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize); + HRESULT hr = pD3D12Screen->dev->CreateCommittedResource( + &descHeap, + D3D12_HEAP_FLAG_NONE, + &descResource, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(pD3D12Dec->m_curFrameCompressedBitstreamBuffer.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - " + "CreateCommittedResource failed with HR %x\n", + hr); + return false; + } + + pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize; + return true; +} + +bool +d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec, + struct pipe_video_buffer *pCurrentDecodeTarget, + struct d3d12_video_buffer *pD3D12VideoBuffer, + ID3D12Resource **ppOutTexture2D, + uint32_t *pOutSubresourceIndex, + ID3D12Resource **ppRefOnlyOutTexture2D, + uint32_t *pRefOnlyOutSubresourceIndex, + const d3d12_video_decode_output_conversion_arguments &conversionArgs) +{ + if(!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) { + debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n"); + return false; + } + + // Refresh DPB active references for current frame, release memory for unused references. + d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec); + + // Get the output texture for the current frame to be decoded + pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget, + ppOutTexture2D, + pOutSubresourceIndex); + + auto vidBuffer = (struct d3d12_video_buffer *)(pCurrentDecodeTarget); + // If is_pipe_buffer_underlying_output_decode_allocation is enabled, + // we can just use the underlying allocation in pCurrentDecodeTarget + // and avoid an extra copy after decoding the frame. + // If this is the case, we need to handle the residency of this resource + // (if not we're actually creating the resources with CreateCommitedResource with + // residency by default) + if(pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { + assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D); + // Make it permanently resident for video use + d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture); + } + + // Get the reference only texture for the current frame to be decoded (if applicable) + bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags & + d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0; + if (fReferenceOnly) { + bool needsTransitionToDecodeWrite = false; + pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget, + ppRefOnlyOutTexture2D, + pRefOnlyOutSubresourceIndex, + needsTransitionToDecodeWrite); + assert(needsTransitionToDecodeWrite); + + CD3DX12_RESOURCE_DESC outputDesc((*ppRefOnlyOutTexture2D)->GetDesc()); + uint32_t MipLevel, PlaneSlice, ArraySlice; + D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex, + outputDesc.MipLevels, + outputDesc.ArraySize(), + MipLevel, + ArraySlice, + PlaneSlice); + + for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { + uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); + + D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { + CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, + planeOutputSubresource), + }; + pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); + } + + // Schedule reverse (back to common) transitions before command list closes for current frame + for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { + uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); + pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( + CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D, + D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, + D3D12_RESOURCE_STATE_COMMON, + planeOutputSubresource)); + } + } + + // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame + // otherwise, use the standard output resource + ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D; + uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex; + + switch (pD3D12Dec->m_d3d12DecProfileType) { + case d3d12_video_decode_profile_type_h264: + { + d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec, + pCurrentFrameDPBEntry, + currentFrameDPBEntrySubresource); + } break; + + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } + + return true; +} + +bool +d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec, + struct d3d12_video_buffer *pD3D12VideoBuffer, + const d3d12_video_decode_output_conversion_arguments &conversionArguments) +{ + uint32_t width; + uint32_t height; + uint16_t maxDPB; + bool isInterlaced; + d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB, isInterlaced); + + ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); + D3D12_RESOURCE_DESC outputResourceDesc = pPipeD3D12DstResource->GetDesc(); + + pD3D12VideoBuffer->base.interlaced = isInterlaced; + D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested = + isInterlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE; + if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) || + (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) { + // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder. + D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc; + decoderDesc.Configuration.InterlaceType = interlaceTypeRequested; + decoderDesc.Configuration.DecodeProfile = + d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType); + pD3D12Dec->m_spVideoDecoder.Reset(); + HRESULT hr = + pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc, + IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf())); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n", + hr); + return false; + } + // Update state after CreateVideoDecoder succeeds only. + pD3D12Dec->m_decoderDesc = decoderDesc; + } + + if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap || + pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width || + pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height || + pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) { + // Detect the combination of AOT/ReferenceOnly to configure the DPB manager + uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount + + 1 /*extra slot for current picture*/ : + maxDPB; + d3d12_video_decode_dpb_descriptor dpbDesc = {}; + dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width; + dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height; + dpbDesc.Format = + (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format; + dpbDesc.fArrayOfTexture = + ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0); + dpbDesc.dpbSize = referenceCount; + dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask; + dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags & + d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0); + + // Create DPB manager + if (pD3D12Dec->m_spDPBManager == nullptr) { + pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen, + pD3D12Dec->m_NodeMask, + pD3D12Dec->m_d3d12DecProfileType, + dpbDesc)); + } + + // + // (Re)-create decoder heap + // + D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {}; + decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask; + decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration; + decoderHeapDesc.DecodeWidth = dpbDesc.Width; + decoderHeapDesc.DecodeHeight = dpbDesc.Height; + decoderHeapDesc.Format = dpbDesc.Format; + decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB; + pD3D12Dec->m_spVideoDecoderHeap.Reset(); + HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap( + &decoderHeapDesc, + IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf())); + if (FAILED(hr)) { + debug_printf( + "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n", + hr); + return false; + } + // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only. + pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc; + } + + pD3D12Dec->m_decodeFormat = outputResourceDesc.Format; + + return true; +} + +void +d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec) +{ + switch (pD3D12Dec->m_d3d12DecProfileType) { + case d3d12_video_decode_profile_type_h264: + { + d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec); + } break; + + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } +} + +void +d3d12_video_decoder_get_frame_info( + struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced) +{ + *pWidth = 0; + *pHeight = 0; + *pMaxDPB = 0; + isInterlaced = false; + + switch (pD3D12Dec->m_d3d12DecProfileType) { + case d3d12_video_decode_profile_type_h264: + { + d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB, isInterlaced); + } break; + + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } + + if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) { + const uint32_t AlignmentMask = 31; + *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask; + } +} + +/// +/// Returns the number of bytes starting from [buf.data() + buffsetOffset] where the _targetCode_ is found +/// Returns -1 if start code not found +/// +int +d3d12_video_decoder_get_next_startcode_offset(std::vector &buf, + unsigned int bufferOffset, + unsigned int targetCode, + unsigned int targetCodeBitSize, + unsigned int numBitsToSearchIntoBuffer) +{ + struct vl_vlc vlc = { 0 }; + + // Shorten the buffer to be [buffetOffset, endOfBuf) + unsigned int bufSize = buf.size() - bufferOffset; + uint8_t *bufPtr = buf.data(); + bufPtr += bufferOffset; + + /* search the first numBitsToSearchIntoBuffer bytes for a startcode */ + vl_vlc_init(&vlc, 1, (const void *const *) &bufPtr, &bufSize); + for (uint i = 0; i < numBitsToSearchIntoBuffer && vl_vlc_bits_left(&vlc) >= targetCodeBitSize; ++i) { + if (vl_vlc_peekbits(&vlc, targetCodeBitSize) == targetCode) + return i; + vl_vlc_eatbits(&vlc, 8); // Stride is 8 bits = 1 byte + vl_vlc_fillbits(&vlc); + } + + return -1; +} + +void +d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input( + struct d3d12_video_decoder *codec, // input argument, current decoder + struct pipe_picture_desc + *picture, // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name + struct d3d12_video_buffer *pD3D12VideoBuffer // input argument, target video buffer +) +{ + assert(picture); + assert(codec); + struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; + + d3d12_video_decode_profile_type profileType = + d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile); + switch (profileType) { + case d3d12_video_decode_profile_type_h264: + { + size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264); + pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture; + ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); + D3D12_RESOURCE_DESC outputResourceDesc = pPipeD3D12DstResource->GetDesc(); + DXVA_PicParams_H264 dxvaPicParamsH264 = + d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue, + codec->base.profile, + outputResourceDesc.Width, + outputResourceDesc.Height, + pPicControlH264); + + d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, + &dxvaPicParamsH264, + dxvaPicParamsBufferSize); + + size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264); + DXVA_Qmatrix_H264 dxvaQmatrixH264 = {}; + d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, + dxvaQmatrixH264); + d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize); + } break; + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } +} + +void +d3d12_video_decoder_prepare_dxva_slices_control( + struct d3d12_video_decoder *pD3D12Dec, // input argument, current decoder + struct pipe_picture_desc *picture +) +{ + d3d12_video_decode_profile_type profileType = + d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile); + switch (profileType) { + case d3d12_video_decode_profile_type_h264: + { + + std::vector pOutSliceControlBuffers; + struct pipe_h264_picture_desc* picture_h264 = (struct pipe_h264_picture_desc*) picture; + d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pOutSliceControlBuffers, picture_h264); + + assert(sizeof(pOutSliceControlBuffers.data()[0]) == sizeof(DXVA_Slice_H264_Short)); + uint64_t DXVAStructSize = pOutSliceControlBuffers.size() * sizeof((pOutSliceControlBuffers.data()[0])); + assert((DXVAStructSize % sizeof(DXVA_Slice_H264_Short)) == 0); + d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(pD3D12Dec, + pOutSliceControlBuffers.data(), + DXVAStructSize); + assert(pD3D12Dec->m_SliceControlBuffer.size() == DXVAStructSize); + } break; + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } +} + +void +d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(struct d3d12_video_decoder *pD3D12Dec, + void *pDXVAStruct, + uint64_t DXVAStructSize) +{ + if (pD3D12Dec->m_SliceControlBuffer.capacity() < DXVAStructSize) { + pD3D12Dec->m_SliceControlBuffer.reserve(DXVAStructSize); + } + + pD3D12Dec->m_SliceControlBuffer.resize(DXVAStructSize); + memcpy(pD3D12Dec->m_SliceControlBuffer.data(), pDXVAStruct, DXVAStructSize); +} + +void +d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec, + void *pDXVAStruct, + uint64_t DXVAStructSize) +{ + if (pD3D12Dec->m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) { + pD3D12Dec->m_InverseQuantMatrixBuffer.reserve(DXVAStructSize); + } + + pD3D12Dec->m_InverseQuantMatrixBuffer.resize(DXVAStructSize); + memcpy(pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize); +} + +void +d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec, + void *pDXVAStruct, + uint64_t DXVAStructSize) +{ + if (pD3D12Dec->m_picParamsBuffer.capacity() < DXVAStructSize) { + pD3D12Dec->m_picParamsBuffer.reserve(DXVAStructSize); + } + + pD3D12Dec->m_picParamsBuffer.resize(DXVAStructSize); + memcpy(pD3D12Dec->m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize); +} + +bool +d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport, + d3d12_video_decode_profile_type profileType) +{ + bool supportedProfile = false; + switch (profileType) { + case d3d12_video_decode_profile_type_h264: + supportedProfile = true; + break; + default: + supportedProfile = false; + break; + } + + return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile; +} + +d3d12_video_decode_profile_type +d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile) +{ + switch (profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: + return d3d12_video_decode_profile_type_h264; + default: + { + unreachable("Unsupported pipe video profile"); + } break; + } +} + +GUID +d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile) +{ + switch (profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: + return D3D12_VIDEO_DECODE_PROFILE_H264; + default: + return {}; + } +} + +GUID +d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType) +{ + switch (profileType) { + case d3d12_video_decode_profile_type_h264: + return D3D12_VIDEO_DECODE_PROFILE_H264; + break; + default: + { + unreachable("Unsupported d3d12_video_decode_profile_type"); + } break; + } +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec.h b/src/gallium/drivers/d3d12/d3d12_video_dec.h new file mode 100644 index 00000000000..0b69e6cc716 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dec.h @@ -0,0 +1,244 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_DEC_H +#define D3D12_VIDEO_DEC_H + +#include "d3d12_video_types.h" +#include "d3d12_video_dec_references_mgr.h" + +/// +/// Pipe video interface starts +/// + +/** + * creates a video decoder + */ +struct pipe_video_codec * +d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ); + +/** + * destroy this video decoder + */ +void +d3d12_video_decoder_destroy(struct pipe_video_codec *codec); + +/** + * start decoding of a new frame + */ +void +d3d12_video_decoder_begin_frame(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); + +/** + * decode a bitstream + */ +void +d3d12_video_decoder_decode_bitstream(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const * buffers, + const unsigned * sizes); + +/** + * end decoding of the current frame + */ +void +d3d12_video_decoder_end_frame(struct pipe_video_codec * codec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); + +/** + * flush any outstanding command buffers to the hardware + * should be called before a video_buffer is acessed by the gallium frontend again + */ +void +d3d12_video_decoder_flush(struct pipe_video_codec *codec); + +/// +/// Pipe video interface ends +/// + +/// +/// d3d12_video_decoder functions starts +/// + +struct d3d12_video_decoder +{ + struct pipe_video_codec base; + struct pipe_screen * m_screen; + struct d3d12_screen * m_pD3D12Screen; + + /// + /// D3D12 objects and context info + /// + + const uint m_NodeMask = 0u; + const uint m_NodeIndex = 0u; + + ComPtr m_spFence; + uint m_fenceValue = 1u; + + ComPtr m_spD3D12VideoDevice; + ComPtr m_spVideoDecoder; + ComPtr m_spVideoDecoderHeap; + ComPtr m_spDecodeCommandQueue; + ComPtr m_spCommandAllocator; + ComPtr m_spDecodeCommandList; + ComPtr m_spCopyQueue; + + std::vector m_transitionsBeforeCloseCmdList; + + D3D12_VIDEO_DECODER_DESC m_decoderDesc = {}; + D3D12_VIDEO_DECODER_HEAP_DESC m_decoderHeapDesc = {}; + D3D12_VIDEO_DECODE_TIER m_tier = D3D12_VIDEO_DECODE_TIER_NOT_SUPPORTED; + DXGI_FORMAT m_decodeFormat; + D3D12_FEATURE_DATA_FORMAT_INFO m_decodeFormatInfo = {}; + D3D12_VIDEO_DECODE_CONFIGURATION_FLAGS m_configurationFlags = D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_NONE; + GUID m_d3d12DecProfile = {}; + d3d12_video_decode_profile_type m_d3d12DecProfileType = {}; + uint m_ConfigDecoderSpecificFlags = 0u; + + /// + /// Current frame tracked state + /// + + // Tracks DPB and reference picture textures + std::unique_ptr m_spDPBManager; + + // Holds pointers to current decode output target texture and reference textures from upper layer + struct pipe_video_buffer *m_pCurrentDecodeTarget; + struct pipe_video_buffer **m_pCurrentReferenceTargets; + + // Holds the input bitstream buffer while it's being constructed in decode_bitstream calls + std::vector m_stagingDecodeBitstream; + + const uint64_t m_InitialCompBitstreamGPUBufferSize = (1024 /*1K*/ * 1024 /*1MB*/) * 8 /*8 MB*/; // 8MB + + // Holds the input bitstream buffer in GPU video memory + ComPtr m_curFrameCompressedBitstreamBuffer; + uint64_t m_curFrameCompressedBitstreamBufferAllocatedSize = + m_InitialCompBitstreamGPUBufferSize; // Actual number of allocated bytes available in the buffer (after + // m_curFrameCompressedBitstreamBufferPayloadSize might be garbage) + uint64_t m_curFrameCompressedBitstreamBufferPayloadSize = 0u; // Actual number of bytes of valid data + + // Holds a buffer for the DXVA struct layout of the picture params of the current frame + std::vector m_picParamsBuffer; // size() has the byte size of the currently held picparams ; capacity() + // has the underlying container allocation size + + // Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX of the + // current frame m_InverseQuantMatrixBuffer.size() == 0 means no quantization matrix buffer is set for current frame + std::vector m_InverseQuantMatrixBuffer; // size() has the byte size of the currently held + // VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX ; + // capacity() has the underlying container allocation size + + // Holds a buffer for the DXVA struct layout of the VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL of the current frame + // m_SliceControlBuffer.size() == 0 means no quantization matrix buffer is set for current frame + std::vector + m_SliceControlBuffer; // size() has the byte size of the currently held VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL ; + // capacity() has the underlying container allocation size + + // Indicates if GPU commands have not been flushed and are pending. + bool m_needsGPUFlush = false; +}; + +bool +d3d12_video_decoder_create_command_objects(const struct d3d12_screen * pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec); +bool +d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen * pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec); +bool +d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen * pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec); +bool +d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen * pD3D12Screen, + struct d3d12_video_decoder *pD3D12Dec, + uint64_t bufSize); +void +d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); +bool +d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec, + struct pipe_video_buffer * pCurrentDecodeTarget, + struct d3d12_video_buffer * pD3D12VideoBuffer, + ID3D12Resource ** ppOutTexture2D, + uint32_t * pOutSubresourceIndex, + ID3D12Resource ** ppRefOnlyOutTexture2D, + uint32_t * pRefOnlyOutSubresourceIndex, + const d3d12_video_decode_output_conversion_arguments &conversionArgs); +void +d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec); +bool +d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder * pD3D12Dec, + struct d3d12_video_buffer * pD3D12VideoBuffer, + const d3d12_video_decode_output_conversion_arguments &conversionArguments); +void +d3d12_video_decoder_get_frame_info( + struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced); +void +d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder *codec, + struct pipe_picture_desc * picture, + struct d3d12_video_buffer * pD3D12VideoBuffer); +template +T * +d3d12_video_decoder_get_current_dxva_picparams(struct d3d12_video_decoder *codec) +{ + return reinterpret_cast(codec->m_picParamsBuffer.data()); +} +bool +d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport, + d3d12_video_decode_profile_type profileType); +d3d12_video_decode_profile_type +d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile); +GUID +d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType); +void +d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *codec, + void * pDXVABuffer, + uint64_t DXVABufferSize); +void +d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec, + void * pDXVAStruct, + uint64_t DXVAStructSize); +void +d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder *pD3D12Dec, struct pipe_picture_desc *picture); +void +d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(struct d3d12_video_decoder *pD3D12Dec, + void * pDXVAStruct, + uint64_t DXVAStructSize); +int +d3d12_video_decoder_get_next_startcode_offset(std::vector &buf, + unsigned int bufferOffset, + unsigned int targetCode, + unsigned int targetCodeBitSize, + unsigned int numBitsToSearchIntoBuffer); + +/// +/// d3d12_video_decoder functions ends +/// + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp new file mode 100644 index 00000000000..41bf2e5bfae --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp @@ -0,0 +1,602 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_dec.h" +#include "d3d12_video_dec_h264.h" + +void +d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder *pD3D12Dec) +{ + // Method overview + // 1. Codec specific strategy in switch statement regarding reference frames eviction policy. Should only mark active + // DPB references, leaving evicted ones as unused + // 2. Call release_unused_references_texture_memory(); at the end of this method. Any references (and texture + // allocations associated) + // that were left not marked as used in m_spDPBManager by step (2) are lost. + + // Assign DXVA original Index7Bits indices to current frame and references + DXVA_PicParams_H264 *pCurrPicParams = d3d12_video_decoder_get_current_dxva_picparams(pD3D12Dec); + for (uint8_t i = 0; i < 16; i++) { + // From H264 DXVA spec: + // Index7Bits + // An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture + // parameters structure(section 4.0) or the RefPicList member of the slice control data + // structure(section 6.0) When Index7Bits is used in the CurrPic and RefFrameList members of the picture + // parameters structure, the value directly specifies the DXVA index of an uncompressed surface. When + // Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies + // the surface indirectly, as an index into the RefFrameList array of the associated picture parameters + // structure.For more information, see section 6.2. In all cases, when Index7Bits does not contain a valid + // index, the value is 127. + if (pCurrPicParams->RefFrameList[i].bPicEntry != DXVA_H264_INVALID_PICTURE_ENTRY_VALUE) { + pCurrPicParams->RefFrameList[i].Index7Bits = + pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentReferenceTargets[i]); + } + } + + pD3D12Dec->m_spDPBManager->mark_all_references_as_unused(); + pD3D12Dec->m_spDPBManager->mark_references_in_use(pCurrPicParams->RefFrameList); + + // Releases the underlying reference picture texture objects of all references that were not marked as used in this + // method. + pD3D12Dec->m_spDPBManager->release_unused_references_texture_memory(); + + pCurrPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentDecodeTarget); + + debug_printf("[d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input] DXVA_PicParams_H264 converted " + "from pipe_h264_picture_desc (No reference index remapping)\n"); + d3d12_video_decoder_log_pic_params_h264(pCurrPicParams); +} + +void +d3d12_video_decoder_get_frame_info_h264( + struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced) +{ + auto pPicParams = d3d12_video_decoder_get_current_dxva_picparams(pD3D12Dec); + // wFrameWidthInMbsMinus1 Width of the frame containing this picture, in units of macroblocks, minus 1. (The width in + // macroblocks is wFrameWidthInMbsMinus1 plus 1.) wFrameHeightInMbsMinus1 Height of the frame containing this + // picture, in units of macroblocks, minus 1. (The height in macroblocks is wFrameHeightInMbsMinus1 plus 1.) When the + // picture is a field, the height of the frame is twice the height of the picture and is an integer multiple of 2 in + // units of macroblocks. + *pWidth = (pPicParams->wFrameWidthInMbsMinus1 + 1) * 16; + *pHeight = (pPicParams->wFrameHeightInMbsMinus1 + 1) / (pPicParams->frame_mbs_only_flag ? 1 : 2); + *pHeight = (2 - pPicParams->frame_mbs_only_flag) * *pHeight; + *pHeight = *pHeight * 16; + *pMaxDPB = pPicParams->num_ref_frames + 1; + isInterlaced = !pPicParams->frame_mbs_only_flag; +} + +/// +/// Pushes the current frame as next reference, updates the DXVA H264 structure with the indices of the DPB and +/// transitions the references +/// +void +d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder *pD3D12Dec, + ID3D12Resource *pTexture2D, + uint32_t subresourceIndex) +{ + DXVA_PicParams_H264 *pPicParams = d3d12_video_decoder_get_current_dxva_picparams(pD3D12Dec); + pPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->store_future_reference(pPicParams->CurrPic.Index7Bits, + pD3D12Dec->m_spVideoDecoderHeap, + pTexture2D, + subresourceIndex); + + // From H264 DXVA spec: + // Index7Bits + // An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture + // parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0) + // When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value + // directly specifies the DXVA index of an uncompressed surface. When Index7Bits is used in the RefPicList member + // of the slice control data structure, the value identifies the surface indirectly, as an index into the + // RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. In + // all cases, when Index7Bits does not contain a valid index, the value is 127. + + std::vector + neededStateTransitions; // Returned by update_entries to perform by the method caller + pD3D12Dec->m_spDPBManager->update_entries( + d3d12_video_decoder_get_current_dxva_picparams(pD3D12Dec)->RefFrameList, + neededStateTransitions); + + pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(neededStateTransitions.size(), neededStateTransitions.data()); + + // Schedule reverse (back to common) transitions before command list closes for current frame + for (auto BarrierDesc : neededStateTransitions) { + std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter); + pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(BarrierDesc); + } + + debug_printf( + "[d3d12_video_decoder_prepare_current_frame_references_h264] DXVA_PicParams_H264 after index remapping)\n"); + d3d12_video_decoder_log_pic_params_h264( + d3d12_video_decoder_get_current_dxva_picparams(pD3D12Dec)); +} + +void +d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder *pD3D12Dec, + std::vector &pOutSliceControlBuffers, + struct pipe_h264_picture_desc *picture_h264) +{ + debug_printf("[d3d12_video_decoder_h264] Upper layer reported %d slices for this frame, parsing them below...\n", + picture_h264->slice_count); + size_t processedBitstreamBytes = 0u; + size_t sliceIdx = 0; + bool sliceFound = false; + do { + DXVA_Slice_H264_Short currentSliceEntry = {}; + // From DXVA spec: All bits for the slice are located within the corresponding bitstream data buffer. + currentSliceEntry.wBadSliceChopping = 0u; + sliceFound = d3d12_video_decoder_get_next_slice_size_and_offset_h264(pD3D12Dec->m_stagingDecodeBitstream, + processedBitstreamBytes, + currentSliceEntry.SliceBytesInBuffer, + currentSliceEntry.BSNALunitDataLocation); + + if (sliceFound) { + d3d12_video_decoder_nal_unit_type_h264 naluType = (d3d12_video_decoder_nal_unit_type_h264)( + pD3D12Dec->m_stagingDecodeBitstream[currentSliceEntry.BSNALunitDataLocation + + (DXVA_H264_START_CODE_LEN_BITS / 8)] & + 0x1F); + debug_printf("[d3d12_video_decoder_h264] Detected slice (NALU Type %d) index %ld with size %d and offset %d " + "for frame with " + "fenceValue: %d\n", + naluType, + sliceIdx, + currentSliceEntry.SliceBytesInBuffer, + currentSliceEntry.BSNALunitDataLocation, + pD3D12Dec->m_fenceValue); + + sliceIdx++; + processedBitstreamBytes += currentSliceEntry.SliceBytesInBuffer; + pOutSliceControlBuffers.push_back(currentSliceEntry); + } + } while (sliceFound && (sliceIdx < picture_h264->slice_count)); + assert(pOutSliceControlBuffers.size() == picture_h264->slice_count); +} + +bool +d3d12_video_decoder_get_next_slice_size_and_offset_h264(std::vector &buf, + unsigned int bufferOffset, + uint32_t &outSliceSize, + uint32_t &outSliceOffset) +{ + // Search the rest of the full frame buffer after the offset + uint numBitsToSearchIntoBuffer = buf.size() - bufferOffset; + int currentSlicePosition = d3d12_video_decoder_get_next_startcode_offset(buf, + bufferOffset, + DXVA_H264_START_CODE, + DXVA_H264_START_CODE_LEN_BITS, + numBitsToSearchIntoBuffer); + + // Return false now if we didn't find a next slice based on the bufferOffset parameter + if (currentSlicePosition < 0) { + return false; + } else { + // Save the absolute buffer offset until the next slice in the output param + outSliceOffset = currentSlicePosition + bufferOffset; + + // Found a next NALU, make sure it's a slice: + d3d12_video_decoder_nal_unit_type_h264 naluType = + (d3d12_video_decoder_nal_unit_type_h264)(buf[outSliceOffset + (DXVA_H264_START_CODE_LEN_BITS / 8)] & 0x1F); + + bool isNaluSliceType = (naluType == type_slice) || (naluType == type_slice_part_A) || + (naluType == type_slice_part_B) || (naluType == type_slice_part_C) || + (naluType == type_slice_IDR) || (naluType == type_slice_aux) || + (naluType == type_slice_layer_ext); + + if (!isNaluSliceType) { + // We found a NALU, but it's not a slice + return false; + } else { + // We did find a next slice based on the bufferOffset parameter + + // Skip current start code, to get the slice after this, to calculate its size + bufferOffset += (DXVA_H264_START_CODE_LEN_BITS / 8 /*convert bits to bytes*/); + numBitsToSearchIntoBuffer = buf.size() - bufferOffset; + + int c_signedStartCodeLen = (DXVA_H264_START_CODE_LEN_BITS / 8 /*convert bits to bytes*/); + int nextSlicePosition = c_signedStartCodeLen // Takes into account the skipped start code + + d3d12_video_decoder_get_next_startcode_offset(buf, + bufferOffset, + DXVA_H264_START_CODE, + DXVA_H264_START_CODE_LEN_BITS, + numBitsToSearchIntoBuffer); + + if (nextSlicePosition < + c_signedStartCodeLen) // if no slice found, d3d12_video_decoder_get_next_startcode_offset returns - 1 + { + // This means currentSlicePosition points to the last slice in the buffer + outSliceSize = buf.size() - outSliceOffset; + } else { + // This means there are more slices after the one pointed by currentSlicePosition + outSliceSize = nextSlicePosition - currentSlicePosition; + } + return true; + } + } +} + +static void +d3d12_video_decoder_log_pic_entry_h264(DXVA_PicEntry_H264 &picEntry) +{ + debug_printf("\t\tIndex7Bits: %d\n" + "\t\tAssociatedFlag: %d\n" + "\t\tbPicEntry: %d\n", + picEntry.Index7Bits, + picEntry.AssociatedFlag, + picEntry.bPicEntry); +} + +void +d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 *pPicParams) +{ + debug_printf("\n=============================================\n"); + debug_printf("wFrameWidthInMbsMinus1 = %d\n", pPicParams->wFrameWidthInMbsMinus1); + debug_printf("wFrameHeightInMbsMinus1 = %d\n", pPicParams->wFrameHeightInMbsMinus1); + debug_printf("CurrPic.Index7Bits = %d\n", pPicParams->CurrPic.Index7Bits); + debug_printf("CurrPic.AssociatedFlag = %d\n", pPicParams->CurrPic.AssociatedFlag); + debug_printf("num_ref_frames = %d\n", pPicParams->num_ref_frames); + debug_printf("sp_for_switch_flag = %d\n", pPicParams->sp_for_switch_flag); + debug_printf("field_pic_flag = %d\n", pPicParams->field_pic_flag); + debug_printf("MbaffFrameFlag = %d\n", pPicParams->MbaffFrameFlag); + debug_printf("residual_colour_transform_flag = %d\n", pPicParams->residual_colour_transform_flag); + debug_printf("chroma_format_idc = %d\n", pPicParams->chroma_format_idc); + debug_printf("RefPicFlag = %d\n", pPicParams->RefPicFlag); + debug_printf("IntraPicFlag = %d\n", pPicParams->IntraPicFlag); + debug_printf("constrained_intra_pred_flag = %d\n", pPicParams->constrained_intra_pred_flag); + debug_printf("MinLumaBipredSize8x8Flag = %d\n", pPicParams->MinLumaBipredSize8x8Flag); + debug_printf("weighted_pred_flag = %d\n", pPicParams->weighted_pred_flag); + debug_printf("weighted_bipred_idc = %d\n", pPicParams->weighted_bipred_idc); + debug_printf("MbsConsecutiveFlag = %d\n", pPicParams->MbsConsecutiveFlag); + debug_printf("frame_mbs_only_flag = %d\n", pPicParams->frame_mbs_only_flag); + debug_printf("transform_8x8_mode_flag = %d\n", pPicParams->transform_8x8_mode_flag); + debug_printf("StatusReportFeedbackNumber = %d\n", pPicParams->StatusReportFeedbackNumber); + debug_printf("CurrFieldOrderCnt[0] = %d\n", pPicParams->CurrFieldOrderCnt[0]); + debug_printf("CurrFieldOrderCnt[1] = %d\n", pPicParams->CurrFieldOrderCnt[1]); + debug_printf("chroma_qp_index_offset = %d\n", pPicParams->chroma_qp_index_offset); + debug_printf("second_chroma_qp_index_offset = %d\n", pPicParams->second_chroma_qp_index_offset); + debug_printf("ContinuationFlag = %d\n", pPicParams->ContinuationFlag); + debug_printf("pic_init_qp_minus26 = %d\n", pPicParams->pic_init_qp_minus26); + debug_printf("pic_init_qs_minus26 = %d\n", pPicParams->pic_init_qs_minus26); + debug_printf("num_ref_idx_l0_active_minus1 = %d\n", pPicParams->num_ref_idx_l0_active_minus1); + debug_printf("num_ref_idx_l1_active_minus1 = %d\n", pPicParams->num_ref_idx_l1_active_minus1); + debug_printf("frame_num = %d\n", pPicParams->frame_num); + debug_printf("log2_max_frame_num_minus4 = %d\n", pPicParams->log2_max_frame_num_minus4); + debug_printf("pic_order_cnt_type = %d\n", pPicParams->pic_order_cnt_type); + debug_printf("log2_max_pic_order_cnt_lsb_minus4 = %d\n", pPicParams->log2_max_pic_order_cnt_lsb_minus4); + debug_printf("delta_pic_order_always_zero_flag = %d\n", pPicParams->delta_pic_order_always_zero_flag); + debug_printf("direct_8x8_inference_flag = %d\n", pPicParams->direct_8x8_inference_flag); + debug_printf("entropy_coding_mode_flag = %d\n", pPicParams->entropy_coding_mode_flag); + debug_printf("pic_order_present_flag = %d\n", pPicParams->pic_order_present_flag); + debug_printf("deblocking_filter_control_present_flag = %d\n", pPicParams->deblocking_filter_control_present_flag); + debug_printf("redundant_pic_cnt_present_flag = %d\n", pPicParams->redundant_pic_cnt_present_flag); + debug_printf("num_slice_groups_minus1 = %d\n", pPicParams->num_slice_groups_minus1); + debug_printf("slice_group_map_type = %d\n", pPicParams->slice_group_map_type); + debug_printf("slice_group_change_rate_minus1 = %d\n", pPicParams->slice_group_change_rate_minus1); + debug_printf("Reserved8BitsB = %d\n", pPicParams->Reserved8BitsB); + debug_printf("UsedForReferenceFlags 0x%08x\n", pPicParams->UsedForReferenceFlags); + debug_printf("NonExistingFrameFlags 0x%08x\n", pPicParams->NonExistingFrameFlags); + + const UINT16 RefPicListLength = _countof(DXVA_PicParams_H264::RefFrameList); + + debug_printf("[D3D12 Video Decoder H264 DXVA PicParams info]\n" + "\t[Current Picture Entry]\n"); + d3d12_video_decoder_log_pic_entry_h264(pPicParams->CurrPic); + + debug_printf("[Decode RefFrameList Pic_Entry list] Entries where bPicEntry == " + "DXVA_H264_INVALID_PICTURE_ENTRY_VALUE are not printed\n"); + for (uint32_t refIdx = 0; refIdx < RefPicListLength; refIdx++) { + if (DXVA_H264_INVALID_PICTURE_ENTRY_VALUE != pPicParams->RefFrameList[refIdx].bPicEntry) { + debug_printf("\t[Reference PicEntry %d]\n", refIdx); + d3d12_video_decoder_log_pic_entry_h264(pPicParams->RefFrameList[refIdx]); + debug_printf("\t\tFrameNumList: %d\n" + "\t\tFieldOrderCntList[0]: %d\n" + "\t\tFieldOrderCntList[1]: %d\n", + pPicParams->FrameNumList[refIdx], + pPicParams->FieldOrderCntList[refIdx][0], + pPicParams->FieldOrderCntList[refIdx][1]); + } + } +} + +DXVA_PicParams_H264 +d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264( + uint32_t frameNum, + pipe_video_profile profile, + uint32_t decodeWidth, // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other + // codecs. + uint32_t decodeHeight, // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other + // codecs. + pipe_h264_picture_desc *pPipeDesc) +{ + DXVA_PicParams_H264 dxvaStructure = {}; + + // uint16_t wFrameWidthInMbsMinus1; + uint width_in_mb = decodeWidth / D3D12_VIDEO_H264_MB_IN_PIXELS; + dxvaStructure.wFrameWidthInMbsMinus1 = width_in_mb - 1; + // uint16_t wFrameHeightInMbsMinus1; + uint height_in_mb = static_cast(std::ceil(decodeHeight / D3D12_VIDEO_H264_MB_IN_PIXELS)); + dxvaStructure.wFrameHeightInMbsMinus1 = height_in_mb - 1; + + // CurrPic.Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264 + // CurrPic.AssociatedFlag + // If field_pic_flag is 1, the AssociatedFlag field in CurrPic is interpreted as follows: + // 0 -> The current picture is the top field of the uncompressed destination frame surface. + // 1 -> The current picture is the bottom field of the uncompressed destination frame surface. + // If field_pic_flag is 0, AssociatedFlag has no meaning and shall be 0, and the accelerator shall ignore the value. + if (pPipeDesc->field_pic_flag) { + dxvaStructure.CurrPic.AssociatedFlag = (pPipeDesc->bottom_field_flag == 0) ? 0 : 1; + } else { + dxvaStructure.CurrPic.AssociatedFlag = 0; + } + + // uint8_t num_ref_frames; + dxvaStructure.num_ref_frames = pPipeDesc->num_ref_frames; + // union { + // struct { + // uint16_t field_pic_flag : 1; + dxvaStructure.field_pic_flag = pPipeDesc->field_pic_flag; + // From H264 codec spec + // The variable MbaffFrameFlag is derived as + // MbaffFrameFlag = ( mb_adaptive_frame_field_flag && !field_pic_flag ) + dxvaStructure.MbaffFrameFlag = (pPipeDesc->pps->sps->mb_adaptive_frame_field_flag && !pPipeDesc->field_pic_flag); + // uint16_t residual_colour_transform_flag :1 + dxvaStructure.residual_colour_transform_flag = pPipeDesc->pps->sps->separate_colour_plane_flag; + // uint16_t sp_for_switch_flag // switch slices are not supported by VA + dxvaStructure.sp_for_switch_flag = 0; + // uint16_t chroma_format_idc : 2; + assert(pPipeDesc->pps->sps->chroma_format_idc == 1); // Not supported otherwise + dxvaStructure.chroma_format_idc = 1; // This is always 4:2:0 for D3D12 Video. NV12/P010 DXGI formats only. + // uint16_t RefPicFlag : 1; + dxvaStructure.RefPicFlag = pPipeDesc->is_reference; + + // uint16_t constrained_intra_pred_flag : 1; + dxvaStructure.constrained_intra_pred_flag = pPipeDesc->pps->constrained_intra_pred_flag; + // uint16_t weighted_pred_flag : 1; + dxvaStructure.weighted_pred_flag = pPipeDesc->pps->weighted_pred_flag; + // uint16_t weighted_bipred_idc : 2; + dxvaStructure.weighted_bipred_idc = pPipeDesc->pps->weighted_bipred_idc; + // From DXVA spec: + // The value shall be 1 unless the restricted-mode profile in use explicitly supports the value 0. + // FMO is not supported by VAAPI + dxvaStructure.MbsConsecutiveFlag = 1; + // uint16_t frame_mbs_only_flag : 1; + dxvaStructure.frame_mbs_only_flag = pPipeDesc->pps->sps->frame_mbs_only_flag; + // uint16_t transform_8x8_mode_flag : 1; + dxvaStructure.transform_8x8_mode_flag = pPipeDesc->pps->transform_8x8_mode_flag; + // }; + // uint16_t wBitFields; + // }; + // uint8_t bit_depth_luma_minus8; + dxvaStructure.bit_depth_luma_minus8 = pPipeDesc->pps->sps->bit_depth_luma_minus8; + assert(dxvaStructure.bit_depth_luma_minus8 == 0); // Only support for NV12 now + // uint8_t bit_depth_chroma_minus8; + dxvaStructure.bit_depth_chroma_minus8 = pPipeDesc->pps->sps->bit_depth_chroma_minus8; + assert(dxvaStructure.bit_depth_chroma_minus8 == 0); // Only support for NV12 now + // uint16_t MinLumaBipredSize8x8Flag + dxvaStructure.MinLumaBipredSize8x8Flag = pPipeDesc->pps->sps->MinLumaBiPredSize8x8; + // char pic_init_qs_minus26 + dxvaStructure.pic_init_qs_minus26 = pPipeDesc->pps->pic_init_qs_minus26; + // uint8_t chroma_qp_index_offset; /* also used for QScb */ + dxvaStructure.chroma_qp_index_offset = pPipeDesc->pps->chroma_qp_index_offset; + // uint8_t second_chroma_qp_index_offset; /* also for QScr */ + dxvaStructure.second_chroma_qp_index_offset = pPipeDesc->pps->second_chroma_qp_index_offset; + + /* remainder for parsing */ + // uint8_t pic_init_qp_minus26; + dxvaStructure.pic_init_qp_minus26 = pPipeDesc->pps->pic_init_qp_minus26; + // uint8_t num_ref_idx_l0_active_minus1; + dxvaStructure.num_ref_idx_l0_active_minus1 = pPipeDesc->num_ref_idx_l0_active_minus1; + // uint8_t num_ref_idx_l1_active_minus1; + dxvaStructure.num_ref_idx_l1_active_minus1 = pPipeDesc->num_ref_idx_l1_active_minus1; + + // uint16_t frame_num; + dxvaStructure.frame_num = pPipeDesc->frame_num; + + // uint8_t log2_max_frame_num_minus4; + dxvaStructure.log2_max_frame_num_minus4 = pPipeDesc->pps->sps->log2_max_frame_num_minus4; + // uint8_t pic_order_cnt_type; + dxvaStructure.pic_order_cnt_type = pPipeDesc->pps->sps->pic_order_cnt_type; + // uint8_t log2_max_pic_order_cnt_lsb_minus4; + dxvaStructure.log2_max_pic_order_cnt_lsb_minus4 = pPipeDesc->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + // uint8_t delta_pic_order_always_zero_flag; + dxvaStructure.delta_pic_order_always_zero_flag = pPipeDesc->pps->sps->delta_pic_order_always_zero_flag; + // uint8_t direct_8x8_inference_flag; + dxvaStructure.direct_8x8_inference_flag = pPipeDesc->pps->sps->direct_8x8_inference_flag; + // uint8_t entropy_coding_mode_flag; + dxvaStructure.entropy_coding_mode_flag = pPipeDesc->pps->entropy_coding_mode_flag; + // uint8_t num_slice_groups_minus1; + dxvaStructure.num_slice_groups_minus1 = pPipeDesc->pps->num_slice_groups_minus1; + assert(dxvaStructure.num_slice_groups_minus1 == 0); // FMO Not supported by VA + + // uint8_t slice_group_map_type; + dxvaStructure.slice_group_map_type = pPipeDesc->pps->slice_group_map_type; + // uint8_t deblocking_filter_control_present_flag; + dxvaStructure.deblocking_filter_control_present_flag = pPipeDesc->pps->deblocking_filter_control_present_flag; + // uint8_t redundant_pic_cnt_present_flag; + dxvaStructure.redundant_pic_cnt_present_flag = pPipeDesc->pps->redundant_pic_cnt_present_flag; + // uint16_t slice_group_change_rate_minus1; + dxvaStructure.slice_group_change_rate_minus1 = pPipeDesc->pps->slice_group_change_rate_minus1; + + // int32_t CurrFieldOrderCnt[2]; + dxvaStructure.CurrFieldOrderCnt[0] = pPipeDesc->field_order_cnt[0]; + dxvaStructure.CurrFieldOrderCnt[1] = pPipeDesc->field_order_cnt[1]; + + // DXVA_PicEntry_H264 RefFrameList[16]; /* DXVA_PicEntry_H264.AssociatedFlag 1 means LongTermRef */ + // From DXVA spec: + // RefFrameList + // Contains a list of 16 uncompressed frame buffer surfaces. All uncompressed surfaces that correspond to pictures + // currently marked as "used for reference" must appear in the RefFrameList array. Non-reference surfaces (those + // which only contain pictures for which the value of RefPicFlag was 0 when the picture was decoded) shall not appear + // in RefFrameList for a subsequent picture. In addition, surfaces that contain only pictures marked as "unused for + // reference" shall not appear in RefFrameList for a subsequent picture. + + dxvaStructure.UsedForReferenceFlags = 0; // initialize to zero and set only the appropiate values below + + bool frameUsesAnyRefPicture = false; + for (uint i = 0; i < 16; i++) { + // Fix ad-hoc behaviour from the VA upper layer which always marks short term references as top_is_reference and + // bottom_is_reference as true and then differenciates using INT_MAX in field_order_cnt_list[i][0]/[1] to indicate + // not used convert to expected + if (pPipeDesc->field_order_cnt_list[i][0] == INT_MAX) { + pPipeDesc->top_is_reference[i] = false; + pPipeDesc->field_order_cnt_list[i][0] = 0; // DXVA Spec says this has to be zero if unused + } + + if (pPipeDesc->field_order_cnt_list[i][1] == INT_MAX) { + pPipeDesc->bottom_is_reference[i] = false; + pPipeDesc->field_order_cnt_list[i][1] = 0; // DXVA Spec says this has to be zero if unused + } + + // If both top and bottom reference flags are false, this is an invalid entry + bool validEntry = (pPipeDesc->top_is_reference[i] || pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]); + if (!validEntry) { + // From DXVA spec: + // Entries that will not be used for decoding the current picture, or any subsequent pictures, are indicated by + // setting bPicEntry to 0xFF. If bPicEntry is not 0xFF, the entry may be used as a reference surface for + // decoding the current picture or a subsequent picture (in decoding order). + dxvaStructure.RefFrameList[i].bPicEntry = DXVA_H264_INVALID_PICTURE_ENTRY_VALUE; + dxvaStructure.FieldOrderCntList[i][0] = 0; + dxvaStructure.FieldOrderCntList[i][1] = 0; + dxvaStructure.FrameNumList[i] = 0; + } else { + frameUsesAnyRefPicture = true; + // From DXVA spec: + // For each entry whose value is not 0xFF, the value of AssociatedFlag is interpreted as follows: + // 0 - Not a long-term reference frame. + // 1 - Long-term reference frame. The uncompressed frame buffer contains a reference frame or one or more + // reference fields marked as "used for long-term reference." If field_pic_flag is 1, the current uncompressed + // frame surface may appear in the list for the purpose of decoding the second field of a complementary + // reference field pair. + dxvaStructure.RefFrameList[i].AssociatedFlag = pPipeDesc->is_long_term[i] ? 1u : 0u; + + // dxvaStructure.RefFrameList[i].Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264 + + // uint16_t FrameNumList[16]; + // FrameNumList + // For each entry in RefFrameList, the corresponding entry in FrameNumList + // contains the value of FrameNum or LongTermFrameIdx, depending on the value of + // AssociatedFlag in the RefFrameList entry. (FrameNum is assigned to short-term + // reference pictures, and LongTermFrameIdx is assigned to long-term reference + // pictures.) + // If an element in the list of frames is not relevent (for example, if the corresponding + // entry in RefFrameList is empty or is marked as "not used for reference"), the value + // of the FrameNumList entry shall be 0. Accelerators can rely on this constraint being + // fulfilled. + dxvaStructure.FrameNumList[i] = pPipeDesc->frame_num_list[i]; + + // int32_t FieldOrderCntList[16][2]; + // Contains the picture order counts for the reference frames listed in RefFrameList. + // For each entry i in the RefFrameList array, FieldOrderCntList[i][0] contains the + // value of TopFieldOrderCnt for entry i, and FieldOrderCntList[i][1] contains the + // value of BottomFieldOrderCnt for entry i. + // + // If an element of the list is not relevent (for example, if the corresponding entry in + // RefFrameList is empty or is marked as "not used for reference"), the value of + // TopFieldOrderCnt or BottomFieldOrderCnt in FieldOrderCntList shall be 0. + // Accelerators can rely on this constraint being fulfilled. + + dxvaStructure.FieldOrderCntList[i][0] = pPipeDesc->field_order_cnt_list[i][0]; + dxvaStructure.FieldOrderCntList[i][1] = pPipeDesc->field_order_cnt_list[i][1]; + + // From DXVA spec + // UsedForReferenceFlags + // Contains two 1-bit flags for each entry in RefFrameList. For the ith entry in RefFrameList, the two flags + // are accessed as follows:  Flag1i = (UsedForReferenceFlags >> (2 * i)) & 1  Flag2i = (UsedForReferenceFlags + // >> (2 * i + 1)) & 1 If Flag1i is 1, the top field of frame number i is marked as "used for reference," as + // defined by the H.264/AVC specification. If Flag2i is 1, the bottom field of frame number i is marked as + // "used for reference." (Otherwise, if either flag is 0, that field is not marked as "used for reference.") If + // an element in the list of frames is not relevent (for example, if the corresponding entry in RefFrameList is + // empty), the value of both flags for that entry shall be 0. Accelerators may rely on this constraint being + // fulfilled. + + if (pPipeDesc->top_is_reference[i] || pPipeDesc->is_long_term[i]) { + dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i)); + } + + if (pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]) { + dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i + 1)); + } + } + } + + // frame type (I, P, B, etc) is not included in pipeDesc data, let's try to derive it + // from the reference list...if frame doesn't use any references, it should be an I frame. + dxvaStructure.IntraPicFlag = !frameUsesAnyRefPicture; + + // uint8_t pic_order_present_flag; /* Renamed to bottom_field_pic_order_in_frame_present_flag in newer standard + // versions. */ + dxvaStructure.pic_order_present_flag = pPipeDesc->pps->bottom_field_pic_order_in_frame_present_flag; + + // Software decoders should be implemented, as soon as feasible, to set the value of + // Reserved16Bits to 3. The value 0 was previously assigned for uses prior to July 20, + // 2007. The value 1 was previously assigned for uses prior to October 12, 2007. The + // value 2 was previously assigned for uses prior to January 15, 2009. Software + // decoders shall not set Reserved16Bits to any value other than those listed here. + // Note Software decoders that set Reserved16Bits to 3 should ensure that any aspects of software decoder operation + // that were previously not in conformance with this version of the specification have been corrected in the current + // implementation. One particular aspect of conformance that should be checked is the ordering of quantization + // scaling list data, as specified in section 5.2. In addition, the ReservedIntraBit flag in the macroblock control + // buffer must use the semantics described in section 7.2 (this flag was previously reserved). The semantics of + // Index7Bits and RefPicList have also been clarified in updates to this specification. + dxvaStructure.Reserved16Bits = 3; + + // DXVA spec: Arbitrary number set by the host decoder to use as a tag in the status report + // feedback data. The value should not equal 0, and should be different in each call to + // Execute. For more information, see section 12.0, Status Report Data Structure. + dxvaStructure.StatusReportFeedbackNumber = frameNum; + assert(dxvaStructure.StatusReportFeedbackNumber > 0); + + // from DXVA spec + // ContinuationFlag + // If this flag is 1, the remainder of this structure is present in the buffer and contains valid values. If this + // flag is 0, the structure might be truncated at this point in the buffer, or the remaining fields may be set to 0 + // and shall be ignored by the accelerator. The remaining members of this structure are needed only for off-host + // bitstream parsing. If the host decoder parses the bitstream, the decoder can truncate the picture parameters data + // structure buffer after the ContinuationFlag or set the remaining members to zero. uint8_t ContinuationFlag; + dxvaStructure.ContinuationFlag = + 1; // DXVA destination struct does contain members from the slice section of pipeDesc... + + return dxvaStructure; +} + +void +d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc *pPipeDesc, + DXVA_Qmatrix_H264 &outMatrixBuffer) +{ + // Please note here that the matrices coming from the gallium VA frontend are copied from VAIQMatrixBufferH264 + // which are specified in VAAPI as being in raster scan order (different than zigzag needed by DXVA) + // also please note that VAIQMatrixBufferH264.ScalingList8x8 is copied into the first two rows of + // pipe_h264_pps.ScalingList8x8 leaving the upper 4 rows of pipe_h264_pps.ScalingList8x8[6][64] unmodified + // Finally, please note that other gallium frontends might decide to copy the scaling lists in other order + // and this section might have to be extended to add support for them. + + // In DXVA each scaling list is ordered in zig-zag scan order, convert them from raster scan order. + unsigned i, j; + for (i = 0; i < 6; i++) { + for (j = 0; j < 16; j++) { + outMatrixBuffer.bScalingLists4x4[i][j] = pPipeDesc->pps->ScalingList4x4[i][d3d12_video_zigzag_scan[j]]; + } + } + for (i = 0; i < 64; i++) { + outMatrixBuffer.bScalingLists8x8[0][i] = pPipeDesc->pps->ScalingList8x8[0][d3d12_video_zigzag_direct[i]]; + outMatrixBuffer.bScalingLists8x8[1][i] = pPipeDesc->pps->ScalingList8x8[1][d3d12_video_zigzag_direct[i]]; + } +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_h264.h b/src/gallium/drivers/d3d12/d3d12_video_dec_h264.h new file mode 100644 index 00000000000..db1ad22b2ab --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dec_h264.h @@ -0,0 +1,253 @@ + +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_DEC_H264_H +#define D3D12_VIDEO_DEC_H264_H + +#include "d3d12_video_types.h" + +// From DXVA spec regarding DXVA_PicEntry_H264 entries: +// Entries that will not be used for decoding the current picture, or any subsequent pictures, +// are indicated by setting bPicEntry to 0xFF. +// If bPicEntry is not 0xFF, the entry may be used as a reference surface for decoding the current picture or +// a subsequent picture (in decoding order). +constexpr uint16_t DXVA_H264_INVALID_PICTURE_INDEX = + 0x7F; // This corresponds to DXVA_PicEntry_H264.Index7Bits ; Not to be confused with the invalid value for + // DXVA_PicEntry_H264.bPicEntry full char value +constexpr uint16_t DXVA_H264_INVALID_PICTURE_ENTRY_VALUE = 0xFF; // This corresponds to DXVA_PicEntry_H264.bPicEntry + +constexpr unsigned int DXVA_H264_START_CODE = 0x000001; // 3 byte start code +constexpr unsigned int DXVA_H264_START_CODE_LEN_BITS = 24; // 3 byte start code + +enum d3d12_video_decoder_nal_unit_type_h264 +{ + type_unspecified = 0, + type_slice = 1, + type_slice_part_A = 2, + type_slice_part_B = 3, + type_slice_part_C = 4, + type_slice_IDR = 5, + type_SEI = 6, + type_SPS = 7, + type_PPS = 8, + type_acces_delimiter = 9, + type_EOSeq = 10, + type_EOStr = 11, + type_EOData = 12, + type_SPSxt = 13, + type_prefix_nal_unit = 14, + type_subset_SPS = 15, + type_reserved16 = 16, + type_reserved17 = 17, + type_reserved18 = 18, + type_slice_aux = 19, + type_slice_layer_ext = 20, + type_reserved21 = 21, + type_reserved22 = 22, + type_reserved23 = 23, + type_unspecified24 = 24, + type_unspecified25 = 25, + type_unspecified26 = 26, + type_unspecified27 = 27, + type_unspecified28 = 28, + type_unspecified29 = 29, + type_unspecified30 = 30, + type_unspecified31 = 31 +}; + +/* H.264/AVC picture entry data structure */ +/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */ +#pragma pack(push, BeforeDXVApacking, 1) +typedef struct _DXVA_PicEntry_H264 +{ + union + { + struct + { + uint8_t Index7Bits : 7; + uint8_t AssociatedFlag : 1; + }; + uint8_t bPicEntry; + }; +} DXVA_PicEntry_H264, *LPDXVA_PicEntry_H264; /* 1 byte */ +#pragma pack(pop, BeforeDXVApacking) + +/* H.264/AVC picture parameters structure */ +/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */ +#pragma pack(push, BeforeDXVApacking, 1) +typedef struct _DXVA_PicParams_H264 +{ + uint16_t wFrameWidthInMbsMinus1; + uint16_t wFrameHeightInMbsMinus1; + DXVA_PicEntry_H264 CurrPic; /* flag is bot field flag */ + uint8_t num_ref_frames; + + union + { + struct + { + uint16_t field_pic_flag : 1; + uint16_t MbaffFrameFlag : 1; + uint16_t residual_colour_transform_flag : 1; + uint16_t sp_for_switch_flag : 1; + uint16_t chroma_format_idc : 2; + uint16_t RefPicFlag : 1; + uint16_t constrained_intra_pred_flag : 1; + + uint16_t weighted_pred_flag : 1; + uint16_t weighted_bipred_idc : 2; + uint16_t MbsConsecutiveFlag : 1; + uint16_t frame_mbs_only_flag : 1; + uint16_t transform_8x8_mode_flag : 1; + uint16_t MinLumaBipredSize8x8Flag : 1; + uint16_t IntraPicFlag : 1; + }; + uint16_t wBitFields; + }; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + + uint16_t Reserved16Bits; + uint32_t StatusReportFeedbackNumber; + + DXVA_PicEntry_H264 RefFrameList[16]; /* flag LT */ + int32_t CurrFieldOrderCnt[2]; + int32_t FieldOrderCntList[16][2]; + + char pic_init_qs_minus26; + char chroma_qp_index_offset; /* also used for QScb */ + char second_chroma_qp_index_offset; /* also for QScr */ + uint8_t ContinuationFlag; + + /* remainder for parsing */ + char pic_init_qp_minus26; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + uint8_t Reserved8BitsA; + + uint16_t FrameNumList[16]; + uint32_t UsedForReferenceFlags; + uint16_t NonExistingFrameFlags; + uint16_t frame_num; + + uint8_t log2_max_frame_num_minus4; + uint8_t pic_order_cnt_type; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t delta_pic_order_always_zero_flag; + + uint8_t direct_8x8_inference_flag; + uint8_t entropy_coding_mode_flag; + uint8_t pic_order_present_flag; + uint8_t num_slice_groups_minus1; + + uint8_t slice_group_map_type; + uint8_t deblocking_filter_control_present_flag; + uint8_t redundant_pic_cnt_present_flag; + uint8_t Reserved8BitsB; + + uint16_t slice_group_change_rate_minus1; + + uint8_t SliceGroupMap[810]; /* 4b/sgmu, Size BT.601 */ + +} DXVA_PicParams_H264, *LPDXVA_PicParams_H264; +#pragma pack(pop, BeforeDXVApacking) + +/* H.264/AVC quantization weighting matrix data structure */ +/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */ +#pragma pack(push, BeforeDXVApacking, 1) +typedef struct _DXVA_Qmatrix_H264 +{ + uint8_t bScalingLists4x4[6][16]; + uint8_t bScalingLists8x8[2][64]; + +} DXVA_Qmatrix_H264, *LPDXVA_Qmatrix_H264; +#pragma pack(pop, BeforeDXVApacking) + +// For translating the QP matrices from VA to DXVA +const uint8_t d3d12_video_zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +// For translating the QP matrices from VA to DXVA +const uint8_t d3d12_video_zigzag_scan[16+1] = { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, +}; + +/* H.264/AVC slice control data structure - short form */ +/* If including new DXVA structs in this header, check the byte-alignment packing pragma declarations that need to be included with them */ +#pragma pack(push, BeforeDXVApacking, 1) +typedef struct _DXVA_Slice_H264_Short +{ + uint32_t BSNALunitDataLocation; /* type 1..5 */ + uint32_t SliceBytesInBuffer; /* for off-host parse */ + uint16_t wBadSliceChopping; /* for off-host parse */ +} DXVA_Slice_H264_Short, *LPDXVA_Slice_H264_Short; +#pragma pack(pop, BeforeDXVApacking) + +DXVA_PicParams_H264 +d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(uint32_t frameNum, + pipe_video_profile profile, + uint32_t frameWidth, + uint32_t frameHeight, + pipe_h264_picture_desc *pipeDesc); +void +d3d12_video_decoder_get_frame_info_h264( + struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced); +void +d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder *pD3D12Dec, + ID3D12Resource * pTexture2D, + uint32_t subresourceIndex); +void +d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc *pPipeDesc, + DXVA_Qmatrix_H264 & outMatrixBuffer); +void +d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder *pD3D12Dec); +bool +d3d12_video_decoder_get_next_slice_size_and_offset_h264(std::vector &buf, + unsigned int bufferOffset, + uint32_t & outSliceSize, + uint32_t & outSliceOffset); + +uint +d3d12_video_decoder_get_slice_count_h264(std::vector &buf); + +void +d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder * pD3D12Dec, + std::vector &pOutSliceControlBuffers, + struct pipe_h264_picture_desc* picture_h264); + +void +d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 * pPicParams); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp b/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp new file mode 100644 index 00000000000..8f170a997c0 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.cpp @@ -0,0 +1,449 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_dec_references_mgr.h" +#include "d3d12_video_dec_h264.h" +#include "d3d12_video_texture_array_dpb_manager.h" +#include "d3d12_video_array_of_textures_dpb_manager.h" +#include "d3d12_screen.h" +#include "d3d12_resource.h" +#include "d3d12_video_buffer.h" +#include +#include + +//---------------------------------------------------------------------------------------------------------------------------------- +static uint16_t +GetInvalidReferenceIndex(d3d12_video_decode_profile_type DecodeProfileType) +{ + assert(DecodeProfileType <= d3d12_video_decode_profile_type_max_valid); + + switch (DecodeProfileType) { + case d3d12_video_decode_profile_type_h264: + return DXVA_H264_INVALID_PICTURE_INDEX; + default: + return 0; + }; +} + +//---------------------------------------------------------------------------------------------------------------------------------- +/// +/// This should always be a clear (non ref only) texture, to be presented downstream as the decoded texture +/// Please see get_reference_only_output for the current frame recon pic ref only allocation +/// +void +d3d12_video_decoder_references_manager::get_current_frame_decode_output_texture(struct pipe_video_buffer * pCurrentDecodeTarget, + ID3D12Resource **ppOutTexture2D, + uint32_t * pOutSubresourceIndex) +{ +// First try to find if there's an existing entry for this pCurrentDecodeTarget already in the DPB + // For interlaced scenarios, multiple end_frame calls will need to reference the same texture for top/bottom + assert(m_DecodeTargetToOriginalIndex7Bits.count(pCurrentDecodeTarget) > 0); // Needs to already have a Index7Bits assigned for current pic params + uint16_t remappedIdx = find_remapped_index(m_DecodeTargetToOriginalIndex7Bits[pCurrentDecodeTarget]); + + if(remappedIdx != m_invalidIndex) { // If it already has a remapped index in use, reuse that allocation + // return the existing allocation for this decode target + d3d12_video_reconstructed_picture reconPicture = m_upD3D12TexturesStorageManager->get_reference_frame(remappedIdx); + *ppOutTexture2D = reconPicture.pReconstructedPicture; + *pOutSubresourceIndex = reconPicture.ReconstructedPictureSubresource; + } else { + if (is_reference_only()) { + // When using clear DPB references (not ReferenceOnly) the decode output allocations come from + // m_upD3D12TexturesStorageManager as decode output == reconpic decode output Otherwise, when ReferenceOnly is + // true, both the reference frames in the DPB and the current frame reconpic output must be REFERENCE_ONLY, all + // the allocations are stored in m_upD3D12TexturesStorageManager but we need a +1 allocation without the + // REFERENCE_FRAME to use as clear decoded output. In this case d3d12_video_decoder_references_manager allocates + // and provides m_pClearDecodedOutputTexture Please note that m_pClearDecodedOutputTexture needs to be copied/read + // by the client before calling end_frame again, as the allocation will be reused for the next frame. + + if (m_pClearDecodedOutputTexture == nullptr) { + D3D12_HEAP_PROPERTIES Properties = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, m_dpbDescriptor.m_NodeMask, m_dpbDescriptor.m_NodeMask); + CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Tex2D(m_dpbDescriptor.Format, + m_dpbDescriptor.Width, + m_dpbDescriptor.Height, + 1, + 1, + 1, + 0, + D3D12_RESOURCE_FLAG_NONE); + + HRESULT hr = m_pD3D12Screen->dev->CreateCommittedResource(&Properties, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(m_pClearDecodedOutputTexture.GetAddressOf())); + if (FAILED(hr)) { + debug_printf("CreateCommittedResource failed with HR %x\n", hr); + assert(false); + } + } + + *ppOutTexture2D = m_pClearDecodedOutputTexture.Get(); + *pOutSubresourceIndex = 0; + } else { + if(is_array_of_textures()) { + // In non ref picture and non texarray mode, we can just use the underlying allocation in pCurrentDecodeTarget + // and avoid an extra copy after decoding the frame. + assert(is_pipe_buffer_underlying_output_decode_allocation()); + + auto vidBuffer = (struct d3d12_video_buffer *)(pCurrentDecodeTarget); + *ppOutTexture2D = d3d12_resource_resource(vidBuffer->texture); + *pOutSubresourceIndex = 0; + #if DEBUG + D3D12_RESOURCE_DESC desc = (*ppOutTexture2D)->GetDesc(); + assert(desc.DepthOrArraySize == 1); + // if the underlying resource is a texture array at some point (if the impl. changes) + // we need to also return the correct underlying subresource in *pOutSubresourceIndex = + #endif + + } else { + // The DPB Storage only has standard (without the ref only flags) allocations, directly use one of those. + d3d12_video_reconstructed_picture pFreshAllocation = + m_upD3D12TexturesStorageManager->get_new_tracked_picture_allocation(); + *ppOutTexture2D = pFreshAllocation.pReconstructedPicture; + *pOutSubresourceIndex = pFreshAllocation.ReconstructedPictureSubresource; + } + + } + } +} + +//---------------------------------------------------------------------------------------------------------------------------------- +_Use_decl_annotations_ void +d3d12_video_decoder_references_manager::get_reference_only_output( + struct pipe_video_buffer * pCurrentDecodeTarget, + ID3D12Resource **ppOutputReference, // out -> new reference slot assigned or nullptr + uint32_t * pOutputSubresource, // out -> new reference slot assigned or nullptr + bool &outNeedsTransitionToDecodeWrite // out -> indicates if output resource argument has to be transitioned to + // D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller +) +{ + assert(is_reference_only()); + + // First try to find if there's an existing entry for this pCurrentDecodeTarget already in the DPB + // For interlaced scenarios, multiple end_frame calls will need to reference the same texture for top/bottom + assert(m_DecodeTargetToOriginalIndex7Bits.count(pCurrentDecodeTarget) > 0); // Needs to already have a Index7Bits assigned for current pic params + uint16_t remappedIdx = find_remapped_index(m_DecodeTargetToOriginalIndex7Bits[pCurrentDecodeTarget]); + + if(remappedIdx != m_invalidIndex) { // If it already has a remapped index in use, reuse that allocation + // return the existing allocation for this decode target + d3d12_video_reconstructed_picture reconPicture = m_upD3D12TexturesStorageManager->get_reference_frame(remappedIdx); + *ppOutputReference = reconPicture.pReconstructedPicture; + *pOutputSubresource = reconPicture.ReconstructedPictureSubresource; + outNeedsTransitionToDecodeWrite = true; + } else { + // The DPB Storage only has REFERENCE_ONLY allocations, use one of those. + d3d12_video_reconstructed_picture pFreshAllocation = + m_upD3D12TexturesStorageManager->get_new_tracked_picture_allocation(); + *ppOutputReference = pFreshAllocation.pReconstructedPicture; + *pOutputSubresource = pFreshAllocation.ReconstructedPictureSubresource; + outNeedsTransitionToDecodeWrite = true; + } +} + +//---------------------------------------------------------------------------------------------------------------------------------- +D3D12_VIDEO_DECODE_REFERENCE_FRAMES +d3d12_video_decoder_references_manager::get_current_reference_frames() +{ + d3d12_video_reference_frames args = m_upD3D12TexturesStorageManager->get_current_reference_frames(); + + + // Convert generic IUnknown into the actual decoder heap object + m_ppHeaps.resize(args.NumTexture2Ds, nullptr); + HRESULT hr = S_OK; + for (uint32_t i = 0; i < args.NumTexture2Ds; i++) { + if (args.ppHeaps[i]) { + hr = args.ppHeaps[i]->QueryInterface(IID_PPV_ARGS(&m_ppHeaps[i])); + assert(SUCCEEDED(hr)); + } else { + m_ppHeaps[i] = nullptr; + } + } + + D3D12_VIDEO_DECODE_REFERENCE_FRAMES retVal = { + args.NumTexture2Ds, + args.ppTexture2Ds, + args.pSubresources, + m_ppHeaps.data(), + }; + + return retVal; +} + +//---------------------------------------------------------------------------------------------------------------------------------- +_Use_decl_annotations_ +d3d12_video_decoder_references_manager::d3d12_video_decoder_references_manager( + const struct d3d12_screen * pD3D12Screen, + uint32_t NodeMask, + d3d12_video_decode_profile_type DecodeProfileType, + d3d12_video_decode_dpb_descriptor m_dpbDescriptor) + : m_DecodeTargetToOriginalIndex7Bits({ }), + m_CurrentIndex7BitsAvailable(0), + m_pD3D12Screen(pD3D12Screen), + m_invalidIndex(GetInvalidReferenceIndex(DecodeProfileType)), + m_dpbDescriptor(m_dpbDescriptor), + m_formatInfo({ m_dpbDescriptor.Format }) +{ + HRESULT hr = m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &m_formatInfo, sizeof(m_formatInfo)); + assert(SUCCEEDED(hr)); + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC targetFrameResolution = { static_cast(m_dpbDescriptor.Width), + m_dpbDescriptor.Height }; + D3D12_RESOURCE_FLAGS resourceAllocFlags = + m_dpbDescriptor.fReferenceOnly ? + (D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) : + D3D12_RESOURCE_FLAG_NONE; + + if (m_dpbDescriptor.fArrayOfTexture) { + // If all subresources are 0, the DPB is loaded with an array of individual textures, the D3D Encode API expects + // pSubresources to be null in this case The D3D Decode API expects it to be non-null even with all zeroes. + bool setNullSubresourcesOnAllZero = false; + m_upD3D12TexturesStorageManager = + std::make_unique(m_dpbDescriptor.dpbSize, + m_pD3D12Screen->dev, + m_dpbDescriptor.Format, + targetFrameResolution, + resourceAllocFlags, + setNullSubresourcesOnAllZero, + m_dpbDescriptor.m_NodeMask, + !is_pipe_buffer_underlying_output_decode_allocation()); + } else { + m_upD3D12TexturesStorageManager = std::make_unique(m_dpbDescriptor.dpbSize, + m_pD3D12Screen->dev, + m_dpbDescriptor.Format, + targetFrameResolution, + resourceAllocFlags, + m_dpbDescriptor.m_NodeMask); + } + + m_referenceDXVAIndices.resize(m_dpbDescriptor.dpbSize); + + d3d12_video_reconstructed_picture reconPicture = { nullptr, 0, nullptr }; + + for (uint32_t dpbIdx = 0; dpbIdx < m_dpbDescriptor.dpbSize; dpbIdx++) { + m_upD3D12TexturesStorageManager->insert_reference_frame(reconPicture, dpbIdx); + } + + mark_all_references_as_unused(); + release_unused_references_texture_memory(); +} + +//---------------------------------------------------------------------------------------------------------------------------------- +uint16_t +d3d12_video_decoder_references_manager::find_remapped_index(uint16_t originalIndex) +{ + // Check if the index is already mapped. + for (uint16_t remappedIndex = 0; remappedIndex < m_dpbDescriptor.dpbSize; remappedIndex++) { + if (m_referenceDXVAIndices[remappedIndex].originalIndex == originalIndex) { + return remappedIndex; + } + } + + return m_invalidIndex; +} + +//---------------------------------------------------------------------------------------------------------------------------------- +uint16_t +d3d12_video_decoder_references_manager::update_entry( + uint16_t index, // in + ID3D12Resource *&pOutputReference, // out -> new reference slot assigned or nullptr + uint32_t & OutputSubresource, // out -> new reference slot assigned or 0 + bool &outNeedsTransitionToDecodeRead // out -> indicates if output resource argument has to be transitioned to + // D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller +) +{ + uint16_t remappedIndex = m_invalidIndex; + outNeedsTransitionToDecodeRead = false; + + if (index != m_invalidIndex) { + remappedIndex = find_remapped_index(index); + + outNeedsTransitionToDecodeRead = true; + if (remappedIndex == m_invalidIndex || remappedIndex == m_currentOutputIndex) { + debug_printf("[d3d12_video_decoder_references_manager] update_entry - Invalid Reference Index\n"); + + remappedIndex = m_currentOutputIndex; + outNeedsTransitionToDecodeRead = false; + } + + d3d12_video_reconstructed_picture reconPicture = + m_upD3D12TexturesStorageManager->get_reference_frame(remappedIndex); + pOutputReference = outNeedsTransitionToDecodeRead ? reconPicture.pReconstructedPicture : nullptr; + OutputSubresource = outNeedsTransitionToDecodeRead ? reconPicture.ReconstructedPictureSubresource : 0u; + } + + return remappedIndex; +} + +//---------------------------------------------------------------------------------------------------------------------------------- +_Use_decl_annotations_ uint16_t +d3d12_video_decoder_references_manager::store_future_reference(uint16_t index, + ComPtr &decoderHeap, + ID3D12Resource * pTexture2D, + uint32_t subresourceIndex) +{ + // Check if the index was in use. + uint16_t remappedIndex = find_remapped_index(index); + + if (remappedIndex == m_invalidIndex) { + // The current output index was not used last frame. Get an unused entry. + remappedIndex = find_remapped_index(m_invalidIndex); + } + + if (remappedIndex == m_invalidIndex) { + debug_printf( + "[d3d12_video_decoder_references_manager] d3d12_video_decoder_references_manager - Decode - No available " + "reference map entry for output.\n"); + assert(false); + } + + // Set the index as the key in this map entry. + m_referenceDXVAIndices[remappedIndex].originalIndex = index; + IUnknown *pUnkHeap = nullptr; + HRESULT hr = decoderHeap.Get()->QueryInterface(IID_PPV_ARGS(&pUnkHeap)); + assert(SUCCEEDED(hr)); + d3d12_video_reconstructed_picture reconPic = { pTexture2D, subresourceIndex, pUnkHeap }; + + m_upD3D12TexturesStorageManager->assign_reference_frame(reconPic, remappedIndex); + + // Store the index to use for error handling when caller specifies and invalid reference index. + m_currentOutputIndex = remappedIndex; + m_currentSubresourceIndex = subresourceIndex; + m_currentResource = pTexture2D; + + return remappedIndex; +} + +//---------------------------------------------------------------------------------------------------------------------------------- +void +d3d12_video_decoder_references_manager::mark_reference_in_use(uint16_t index) +{ + if (index != m_invalidIndex) { + uint16_t remappedIndex = find_remapped_index(index); + if (remappedIndex != m_invalidIndex) { + m_referenceDXVAIndices[remappedIndex].fUsed = true; + } + } +} + +//---------------------------------------------------------------------------------------------------------------------------------- +void +d3d12_video_decoder_references_manager::release_unused_references_texture_memory() +{ + for (uint32_t index = 0; index < m_dpbDescriptor.dpbSize; index++) { + if (!m_referenceDXVAIndices[index].fUsed) { + d3d12_video_reconstructed_picture reconPicture = m_upD3D12TexturesStorageManager->get_reference_frame(index); + if (reconPicture.pReconstructedPicture != nullptr) { + bool wasTracked = m_upD3D12TexturesStorageManager->untrack_reconstructed_picture_allocation(reconPicture); + // Untrack this resource, will mark it as free un the underlying storage buffer pool + // if not tracked, must be due to no-copies allocation + assert (wasTracked || is_pipe_buffer_underlying_output_decode_allocation()); + + d3d12_video_reconstructed_picture nullReconPic = { nullptr, 0, nullptr }; + + // Mark the unused refpic as null/empty in the DPB + m_upD3D12TexturesStorageManager->assign_reference_frame(nullReconPic, index); + + // Remove the entry in m_DecodeTargetToOriginalIndex7Bits + auto value = m_referenceDXVAIndices[index].originalIndex; + auto it = std::find_if(m_DecodeTargetToOriginalIndex7Bits.begin(), m_DecodeTargetToOriginalIndex7Bits.end(), + [&value](const std::pair< struct pipe_video_buffer*, uint8_t > &p) { + return p.second == value; + }); + + assert(it != m_DecodeTargetToOriginalIndex7Bits.end()); + + m_DecodeTargetToOriginalIndex7Bits.erase(it); + } + + + m_referenceDXVAIndices[index].originalIndex = m_invalidIndex; + } + } +} + +//---------------------------------------------------------------------------------------------------------------------------------- +void +d3d12_video_decoder_references_manager::mark_all_references_as_unused() +{ + for (uint32_t index = 0; index < m_dpbDescriptor.dpbSize; index++) { + m_referenceDXVAIndices[index].fUsed = false; + } +} + +//---------------------------------------------------------------------------------------------------------------------------------- +void +d3d12_video_decoder_references_manager::print_dpb() +{ + // Resource backing storage always has to match dpbsize + if(!is_pipe_buffer_underlying_output_decode_allocation()) { + assert(m_upD3D12TexturesStorageManager->get_number_of_tracked_allocations() == m_dpbDescriptor.dpbSize); + } + + // get_current_reference_frames query-interfaces the pVideoHeap's. + D3D12_VIDEO_DECODE_REFERENCE_FRAMES curRefFrames = get_current_reference_frames(); + std::string dpbContents; + for (uint32_t dpbResIdx = 0;dpbResIdx < curRefFrames.NumTexture2Ds;dpbResIdx++) { + dpbContents += "\t{ DPBidx: "; + dpbContents += std::to_string(dpbResIdx); + dpbContents += " - ResourcePtr: "; + char strBufTex[256]; + memset(&strBufTex, '\0', 256); + sprintf(strBufTex, "%p", curRefFrames.ppTexture2Ds[dpbResIdx]); + dpbContents += std::string(strBufTex); + dpbContents += " - SubresourceIdx: "; + dpbContents += (curRefFrames.pSubresources ? std::to_string(curRefFrames.pSubresources[dpbResIdx]) : "0"); + dpbContents += " - DecoderHeapPtr: "; + char strBufHeap[256]; + memset(&strBufHeap, '\0', 256); + if(curRefFrames.ppHeaps && curRefFrames.ppHeaps[dpbResIdx]) { + sprintf(strBufHeap, "%p", curRefFrames.ppHeaps[dpbResIdx]); + dpbContents += std::string(strBufHeap); + } else { + dpbContents += "(nil)"; + } + dpbContents += " - Slot type: "; + dpbContents += ((m_currentResource == curRefFrames.ppTexture2Ds[dpbResIdx]) && (m_currentSubresourceIndex == curRefFrames.pSubresources[dpbResIdx])) ? "Current decoded frame output" : "Reference frame"; + dpbContents += " - DXVA_PicParams Reference Index: "; + dpbContents += (m_referenceDXVAIndices[dpbResIdx].originalIndex != m_invalidIndex) ? std::to_string(m_referenceDXVAIndices[dpbResIdx].originalIndex) : "DXVA_UNUSED_PICENTRY"; + dpbContents += "}\n"; + } + + debug_printf("[D3D12 Video Decoder Picture Manager] Decode session information:\n" + "\tDPB Maximum Size (max_ref_count + one_slot_curpic): %d\n" + "\tDXGI_FORMAT: %d\n" + "\tTexture resolution: (%ld, %d)\n" + "\tD3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY enforced: %d\n" + "\tAllocation Mode: %s\n" + "\n ----------------------\n\tCurrent frame information:\n" + "\tD3D12_VIDEO_DECODE_REFERENCE_FRAMES.NumTexture2Ds: %d\n" + "\tDPB Contents Table:\n%s", + m_upD3D12TexturesStorageManager->get_number_of_tracked_allocations(), + m_dpbDescriptor.Format, + m_dpbDescriptor.Width, + m_dpbDescriptor.Height, + m_dpbDescriptor.fReferenceOnly, + (m_dpbDescriptor.fArrayOfTexture ? "ArrayOfTextures" : "TextureArray"), + m_upD3D12TexturesStorageManager->get_number_of_pics_in_dpb(), + dpbContents.c_str()); +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h b/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h new file mode 100644 index 00000000000..63dd3d7d56d --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dec_references_mgr.h @@ -0,0 +1,220 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_DEC_REFMGR_H +#define D3D12_VIDEO_DEC_REFMGR_H + +#include "d3d12_video_types.h" +#include "d3d12_video_dpb_storage_manager.h" +#include "d3d12_util.h" +#include +#include + +struct d3d12_video_decoder_references_manager +{ + d3d12_video_decoder_references_manager(const struct d3d12_screen * pD3D12Screen, + uint32_t NodeMask, + d3d12_video_decode_profile_type DecodeProfileType, + d3d12_video_decode_dpb_descriptor dpbDescriptor); + + bool is_reference_only() + { + return m_dpbDescriptor.fReferenceOnly; + } + bool is_array_of_textures() + { + return m_dpbDescriptor.fArrayOfTexture; + } + + bool is_pipe_buffer_underlying_output_decode_allocation() + { + return (!is_reference_only() && is_array_of_textures()); + } + + void mark_all_references_as_unused(); + void release_unused_references_texture_memory(); + + template + void mark_references_in_use(const T (&picEntries)[size]); + void mark_reference_in_use(uint16_t index); + + uint16_t store_future_reference(uint16_t index, + _In_ ComPtr &decoderHeap, + ID3D12Resource * pTexture2D, + uint32_t subresourceIndex); + + // Will clear() argument outNeededTransitions and fill it with the necessary transitions to perform by the caller + // after the method returns + template + void update_entries(T (&picEntries)[size], std::vector &outNeededTransitions); + + void get_reference_only_output( + struct pipe_video_buffer * pCurrentDecodeTarget, + ID3D12Resource **ppOutputReference, // out -> new reference slot assigned or nullptr + uint32_t * pOutputSubresource, // out -> new reference slot assigned or nullptr + bool &outNeedsTransitionToDecodeWrite // out -> indicates if output resource argument has to be transitioned to + // D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE by the caller + ); + + // Gets the output texture for the current frame to be decoded + void get_current_frame_decode_output_texture(struct pipe_video_buffer *pCurrentDecodeTarget, ID3D12Resource **ppOutTexture2D, uint32_t *pOutSubresourceIndex); + + D3D12_VIDEO_DECODE_REFERENCE_FRAMES get_current_reference_frames(); + + void print_dpb(); + + /// + /// Get the Index7Bits associated with this decode target + /// If there isn't one assigned yet, gives out a fresh/unused Index7Bits + /// + uint8_t get_index7bits(struct pipe_video_buffer * pDecodeTarget) { + bool bDecodeTargetAlreadyHasIndex = (m_DecodeTargetToOriginalIndex7Bits.count(pDecodeTarget) > 0); + if(bDecodeTargetAlreadyHasIndex) + { + return m_DecodeTargetToOriginalIndex7Bits[pDecodeTarget]; + } else { + uint8_t freshIdx = m_CurrentIndex7BitsAvailable; + + // Make sure next "available" index is not already used. Should be cleaned up and there shouldn't be never 127 in flight used indices + #if DEBUG + auto it = std::find_if(m_DecodeTargetToOriginalIndex7Bits.begin(), m_DecodeTargetToOriginalIndex7Bits.end(), + [&freshIdx](const std::pair< struct pipe_video_buffer*, uint8_t > &p) { + return p.second == freshIdx; + }); + + assert(it == m_DecodeTargetToOriginalIndex7Bits.end()); + #endif + + // Point to next circular index for next call + m_CurrentIndex7BitsAvailable = ((m_CurrentIndex7BitsAvailable + 1) % 127); + + // Assign freshIdx to pDecodeTarget + m_DecodeTargetToOriginalIndex7Bits[pDecodeTarget] = freshIdx; + return freshIdx; + } + } + + private: + uint16_t update_entry( + uint16_t index, // in + ID3D12Resource *&pOutputReference, // out -> new reference slot assigned or nullptr + uint32_t & OutputSubresource, // out -> new reference slot assigned or 0 + bool &outNeedsTransitionToDecodeRead // out -> indicates if output resource argument has to be transitioned to + // D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller + ); + + uint16_t find_remapped_index(uint16_t originalIndex); + + struct ReferenceData + { + uint16_t originalIndex; + bool fUsed; + }; + + // Holds the DPB textures + std::unique_ptr m_upD3D12TexturesStorageManager; + std::vector + m_ppHeaps; // Auxiliary allocation to QueryInterface the IUnknown's + // m_upD3D12TexturesStorageManager->get_current_reference_frames().ppHeaps + // containing the generic video encode/decode heap; + + // Holds the mapping between DXVA PicParams indices and the D3D12 indices + std::vector m_referenceDXVAIndices; + + std::map m_DecodeTargetToOriginalIndex7Bits = { }; + uint8_t m_CurrentIndex7BitsAvailable = 0; + + ComPtr m_pClearDecodedOutputTexture; + + const struct d3d12_screen * m_pD3D12Screen; + uint16_t m_invalidIndex; + d3d12_video_decode_dpb_descriptor m_dpbDescriptor = {}; + uint16_t m_currentOutputIndex = 0; + uint16_t m_currentSubresourceIndex = 0; + ID3D12Resource* m_currentResource = nullptr; + D3D12_FEATURE_DATA_FORMAT_INFO m_formatInfo = { m_dpbDescriptor.Format }; +}; + + +//---------------------------------------------------------------------------------------------------------------------------------- +template +void +d3d12_video_decoder_references_manager::update_entries(T (&picEntries)[size], + std::vector &outNeededTransitions) +{ + outNeededTransitions.clear(); + + for (auto &picEntry : picEntries) { + // uint16_t update_entry( + // uint16_t index, // in + // ID3D12Resource*& pOutputReference, // out -> new reference slot assigned or nullptr + // uint32_t& OutputSubresource, // out -> new reference slot assigned or 0 + // bool& outNeedsTransitionToDecodeRead // out -> indicates if output resource argument has to be transitioned + // to D3D12_RESOURCE_STATE_VIDEO_DECODE_READ by the caller + // ); + + ID3D12Resource *pOutputReference = {}; + uint32_t OutputSubresource = 0u; + bool outNeedsTransitionToDecodeRead = false; + + picEntry.Index7Bits = + update_entry(picEntry.Index7Bits, pOutputReference, OutputSubresource, outNeedsTransitionToDecodeRead); + + if (outNeedsTransitionToDecodeRead) { + /// + /// The subresource indexing in D3D12 Video within the DPB doesn't take into account the Y, UV planes (ie. + /// subresource 0, 1, 2, 3..., N are different full NV12 references in the DPB) but when using the subresources + /// in other areas of D3D12 we need to convert it to the D3D12CalcSubresource format, explained in + /// https://docs.microsoft.com/en-us/windows/win32/direct3d12/subresources + /// + CD3DX12_RESOURCE_DESC refDesc(pOutputReference->GetDesc()); + uint32_t MipLevel, PlaneSlice, ArraySlice; + D3D12DecomposeSubresource(OutputSubresource, + refDesc.MipLevels, + refDesc.ArraySize(), + MipLevel, + ArraySlice, + PlaneSlice); + + for (PlaneSlice = 0; PlaneSlice < m_formatInfo.PlaneCount; PlaneSlice++) { + uint planeOutputSubresource = refDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); + outNeededTransitions.push_back(CD3DX12_RESOURCE_BARRIER::Transition(pOutputReference, + D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, + planeOutputSubresource)); + } + } + } +} + +//---------------------------------------------------------------------------------------------------------------------------------- +template +void +d3d12_video_decoder_references_manager::mark_references_in_use(const T (&picEntries)[size]) +{ + for (auto &picEntry : picEntries) { + mark_reference_in_use(picEntry.Index7Bits); + } +} + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h b/src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h new file mode 100644 index 00000000000..08c6a9eaa60 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_dpb_storage_manager.h @@ -0,0 +1,95 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#ifndef D3D12_VIDEO_DPB_STORAGE_MANAGER_INTERFACE_H +#define D3D12_VIDEO_DPB_STORAGE_MANAGER_INTERFACE_H + +#include "d3d12_video_types.h" + +struct d3d12_video_reconstructed_picture +{ + ID3D12Resource *pReconstructedPicture; + uint32_t ReconstructedPictureSubresource; + IUnknown * pVideoHeap; +}; + +struct d3d12_video_reference_frames +{ + uint32_t NumTexture2Ds; + ID3D12Resource **ppTexture2Ds; + uint32_t * pSubresources; + IUnknown ** ppHeaps; +}; + +// Defines interface for storing and retrieving the decoded picture buffer ID3D12Resources with +// the reconstructed pictures +// Implementors of this interface can decide how to do this, let Class1 and Class2 be implementors... +// for example Class1 can use a texture array and Class2 or an array of textures +class d3d12_video_dpb_storage_manager_interface +{ + // d3d12_video_dpb_storage_manager_interface + public: + // Adds a new reference frame at a given position + virtual void insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition) = 0; + + // Gets a reference frame at a given position + virtual d3d12_video_reconstructed_picture get_reference_frame(uint32_t dpbPosition) = 0; + + // Assigns a reference frame at a given position + virtual void assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition) = 0; + + // Removes a new reference frame at a given position and returns operation success + // pResourceUntracked is an optional output indicating if the removed resource was being tracked by the pool + virtual bool remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked) = 0; + + // Returns the resource allocation for a NEW reconstructed picture + virtual d3d12_video_reconstructed_picture get_new_tracked_picture_allocation() = 0; + + // Returns whether it found the tracked resource on this instance pool tracking and was able to free it + virtual bool untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem) = 0; + + // Returns true if the trackedItem was allocated (and is being tracked) by this class + virtual bool is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem) = 0; + + // resource pool size + virtual uint32_t get_number_of_tracked_allocations() = 0; + + // number of resources in the pool that are marked as in use + virtual uint32_t get_number_of_in_use_allocations() = 0; + + // Returns the number of pictures currently stored in the DPB + virtual uint32_t get_number_of_pics_in_dpb() = 0; + + // Returns all the current reference frames stored in the storage manager + virtual d3d12_video_reference_frames get_current_reference_frames() = 0; + + // Remove all pictures from DPB + // returns the number of resources marked as reusable + virtual uint32_t clear_decode_picture_buffer() = 0; + + virtual ~d3d12_video_dpb_storage_manager_interface() + { } +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp b/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp new file mode 100644 index 00000000000..199c678a25f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.cpp @@ -0,0 +1,308 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_video_texture_array_dpb_manager.h" + +#ifndef _WIN32 +#include +#endif + +#define D3D12_IGNORE_SDK_LAYERS +#include + +#include "d3d12_util.h" + +/// +/// d3d12_texture_array_dpb_manager +/// + +// Differences with ArrayOfTextures +// Uses a D3D12 Texture Array instead of an std::vector with individual D3D resources as backing storage +// Doesn't support extension (by reallocation and copy) of the pool + +void +d3d12_texture_array_dpb_manager::create_reconstructed_picture_allocations(ID3D12Resource **ppResource, + uint16_t texArraySize) +{ + if (texArraySize > 0) { + D3D12_HEAP_PROPERTIES Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, m_nodeMask, m_nodeMask); + CD3DX12_RESOURCE_DESC reconstructedPictureResourceDesc = CD3DX12_RESOURCE_DESC::Tex2D(m_encodeFormat, + m_encodeResolution.Width, + m_encodeResolution.Height, + texArraySize, + 1, + 1, + 0, + m_resourceAllocFlags); + + HRESULT hr = m_pDevice->CreateCommittedResource(&Properties, + D3D12_HEAP_FLAG_NONE, + &reconstructedPictureResourceDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(ppResource)); + if (FAILED(hr)) { + debug_printf("CreateCommittedResource failed with HR %x\n", hr); + assert(false); + } + } +} + +d3d12_texture_array_dpb_manager::~d3d12_texture_array_dpb_manager() +{ } + +d3d12_texture_array_dpb_manager::d3d12_texture_array_dpb_manager( + uint16_t dpbTextureArraySize, + ID3D12Device * pDevice, + DXGI_FORMAT encodeSessionFormat, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution, + D3D12_RESOURCE_FLAGS resourceAllocFlags, + uint32_t nodeMask) + : m_pDevice(pDevice), + m_encodeFormat(encodeSessionFormat), + m_encodeResolution(encodeSessionResolution), + m_dpbTextureArraySize(dpbTextureArraySize), + m_resourceAllocFlags(resourceAllocFlags), + m_nodeMask(nodeMask) +{ + // Initialize D3D12 DPB exposed in this class implemented CRUD interface for a DPB + clear_decode_picture_buffer(); + + // Implement a reusable pool of D3D12 Resources as an array of textures + uint16_t poolFixedSize = m_dpbTextureArraySize; + m_ResourcesPool.resize(poolFixedSize); + + // Build resource pool with commitedresources with a d3ddevice and the encoding session settings (eg. resolution) and + // the reference_only flag + create_reconstructed_picture_allocations(m_baseTexArrayResource.GetAddressOf(), poolFixedSize); + + for (uint32_t idxSubres = 0; idxSubres < poolFixedSize; idxSubres++) { + m_ResourcesPool[idxSubres].pResource = m_baseTexArrayResource; + m_ResourcesPool[idxSubres].subresource = idxSubres; + m_ResourcesPool[idxSubres].isFree = true; + } +} + +uint32_t +d3d12_texture_array_dpb_manager::clear_decode_picture_buffer() +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + + uint32_t untrackCount = 0; + // Mark resources used in DPB as re-usable in the resources pool + for (uint32_t idx = 0; idx < m_D3D12DPB.pResources.size(); idx++) { + // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods + // in this interface + untrackCount += + untrack_reconstructed_picture_allocation({ m_D3D12DPB.pResources[idx], m_D3D12DPB.pSubresources[idx] }) ? 1 : + 0; + } + + // Clear DPB + m_D3D12DPB.pResources.clear(); + m_D3D12DPB.pSubresources.clear(); + m_D3D12DPB.pHeaps.clear(); + m_D3D12DPB.pResources.reserve(m_dpbTextureArraySize); + m_D3D12DPB.pSubresources.reserve(m_dpbTextureArraySize); + m_D3D12DPB.pHeaps.reserve(m_dpbTextureArraySize); + + return untrackCount; +} + +// Assigns a reference frame at a given position +void +d3d12_texture_array_dpb_manager::assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, + uint32_t dpbPosition) +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + assert(dpbPosition < m_D3D12DPB.pResources.size()); + + m_D3D12DPB.pResources[dpbPosition] = pReconPicture.pReconstructedPicture; + m_D3D12DPB.pSubresources[dpbPosition] = pReconPicture.ReconstructedPictureSubresource; + m_D3D12DPB.pHeaps[dpbPosition] = pReconPicture.pVideoHeap; +} + +// Adds a new reference frame at a given position +void +d3d12_texture_array_dpb_manager::insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, + uint32_t dpbPosition) +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + if (dpbPosition > m_D3D12DPB.pResources.size()) { + // extend capacity + m_D3D12DPB.pResources.resize(dpbPosition); + m_D3D12DPB.pSubresources.resize(dpbPosition); + m_D3D12DPB.pHeaps.resize(dpbPosition); + } + + m_D3D12DPB.pResources.insert(m_D3D12DPB.pResources.begin() + dpbPosition, pReconPicture.pReconstructedPicture); + m_D3D12DPB.pSubresources.insert(m_D3D12DPB.pSubresources.begin() + dpbPosition, + pReconPicture.ReconstructedPictureSubresource); + m_D3D12DPB.pHeaps.insert(m_D3D12DPB.pHeaps.begin() + dpbPosition, pReconPicture.pVideoHeap); +} + +// Gets a reference frame at a given position +d3d12_video_reconstructed_picture +d3d12_texture_array_dpb_manager::get_reference_frame(uint32_t dpbPosition) +{ + assert(dpbPosition < m_D3D12DPB.pResources.size()); + + d3d12_video_reconstructed_picture retVal = { m_D3D12DPB.pResources[dpbPosition], + m_D3D12DPB.pSubresources[dpbPosition], + m_D3D12DPB.pHeaps[dpbPosition] }; + + return retVal; +} + +// Removes a new reference frame at a given position and returns operation success +bool +d3d12_texture_array_dpb_manager::remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked) +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + + assert(dpbPosition < m_D3D12DPB.pResources.size()); + + // If removed resource came from resource pool, mark it as free + // to free it for a new usage + // Don't assert the untracking result here in case the DPB contains resources not adquired using the pool methods in + // this interface + bool resUntracked = untrack_reconstructed_picture_allocation( + { m_D3D12DPB.pResources[dpbPosition], m_D3D12DPB.pSubresources[dpbPosition] }); + + if (pResourceUntracked != nullptr) { + *pResourceUntracked = resUntracked; + } + + // Remove from DPB tables + m_D3D12DPB.pResources.erase(m_D3D12DPB.pResources.begin() + dpbPosition); + m_D3D12DPB.pSubresources.erase(m_D3D12DPB.pSubresources.begin() + dpbPosition); + m_D3D12DPB.pHeaps.erase(m_D3D12DPB.pHeaps.begin() + dpbPosition); + + return true; +} + +// Returns true if the trackedItem was allocated (and is being tracked) by this class +bool +d3d12_texture_array_dpb_manager::is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem) +{ + for (auto &reusableRes : m_ResourcesPool) { + if ((trackedItem.pReconstructedPicture == reusableRes.pResource.Get()) && + (trackedItem.ReconstructedPictureSubresource == reusableRes.subresource) && !reusableRes.isFree) { + return true; + } + } + return false; +} + +// Returns whether it found the tracked resource on this instance pool tracking and was able to free it +bool +d3d12_texture_array_dpb_manager::untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem) +{ + for (auto &reusableRes : m_ResourcesPool) { + if ((trackedItem.pReconstructedPicture == reusableRes.pResource.Get()) && + (trackedItem.ReconstructedPictureSubresource == reusableRes.subresource)) { + reusableRes.isFree = true; + return true; + } + } + return false; +} + +// Returns a fresh resource for a NEW picture to be written to +// this class implements the dpb allocations as an array of textures +d3d12_video_reconstructed_picture +d3d12_texture_array_dpb_manager::get_new_tracked_picture_allocation() +{ + d3d12_video_reconstructed_picture freshAllocation = { // pResource + nullptr, + // subresource + 0 + }; + + // Find first (if any) available resource to (re-)use + bool bAvailableResourceInPool = false; + for (auto &reusableRes : m_ResourcesPool) { + if (reusableRes.isFree) { + bAvailableResourceInPool = true; + freshAllocation.pReconstructedPicture = reusableRes.pResource.Get(); + freshAllocation.ReconstructedPictureSubresource = reusableRes.subresource; + reusableRes.isFree = false; + break; + } + } + + if (!bAvailableResourceInPool) { + debug_printf("[d3d12_texture_array_dpb_manager] ID3D12Resource pool is full - Pool capacity (%ld) - Returning null allocation", + m_ResourcesPool.size()); + } + + return freshAllocation; +} + +uint32_t +d3d12_texture_array_dpb_manager::get_number_of_pics_in_dpb() +{ + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pSubresources.size()); + assert(m_D3D12DPB.pResources.size() == m_D3D12DPB.pHeaps.size()); + assert(m_D3D12DPB.pResources.size() < UINT32_MAX); + return static_cast(m_D3D12DPB.pResources.size()); +} + +d3d12_video_reference_frames +d3d12_texture_array_dpb_manager::get_current_reference_frames() +{ + d3d12_video_reference_frames retVal = { + get_number_of_pics_in_dpb(), + m_D3D12DPB.pResources.data(), + m_D3D12DPB.pSubresources.data(), + m_D3D12DPB.pHeaps.data(), + }; + + return retVal; +} + +// number of resources in the pool that are marked as in use +uint32_t +d3d12_texture_array_dpb_manager::get_number_of_in_use_allocations() +{ + uint32_t countOfInUseResourcesInPool = 0; + for (auto &reusableRes : m_ResourcesPool) { + if (!reusableRes.isFree) { + countOfInUseResourcesInPool++; + } + } + return countOfInUseResourcesInPool; +} + +// Returns the number of pictures currently stored in the DPB +uint32_t +d3d12_texture_array_dpb_manager::get_number_of_tracked_allocations() +{ + assert(m_ResourcesPool.size() < UINT32_MAX); + return static_cast(m_ResourcesPool.size()); +} diff --git a/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h b/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h new file mode 100644 index 00000000000..a8212cf621f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_texture_array_dpb_manager.h @@ -0,0 +1,126 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#ifndef D3D12_VIDEO_TEXTURE_ARRAY_DPB_MANAGER_H +#define D3D12_VIDEO_TEXTURE_ARRAY_DPB_MANAGER_H + +#include "d3d12_video_dpb_storage_manager.h" +#include "d3d12_video_types.h" + +class d3d12_texture_array_dpb_manager : public d3d12_video_dpb_storage_manager_interface +{ + // d3d12_video_dpb_storage_manager_interface + public: + // Adds a new reference frame at a given position + void insert_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition); + + // Assigns a reference frame at a given position + void assign_reference_frame(d3d12_video_reconstructed_picture pReconPicture, uint32_t dpbPosition); + + // Gets a reference frame at a given position + d3d12_video_reconstructed_picture get_reference_frame(uint32_t dpbPosition); + + // Removes a new reference frame at a given position and returns operation success + // pResourceUntracked is an optional output indicating if the removed resource was being tracked by the pool + bool remove_reference_frame(uint32_t dpbPosition, bool *pResourceUntracked = nullptr); + + // Returns the resource allocation for a NEW picture + d3d12_video_reconstructed_picture get_new_tracked_picture_allocation(); + + // Returns true if the trackedItem was allocated (and is being tracked) by this class + bool is_tracked_allocation(d3d12_video_reconstructed_picture trackedItem); + + // Returns whether it found the tracked resource on this instance pool tracking and was able to free it + bool untrack_reconstructed_picture_allocation(d3d12_video_reconstructed_picture trackedItem); + + // Returns the number of pictures currently stored in the DPB + uint32_t get_number_of_pics_in_dpb(); + + // Returns all the current reference frames stored + d3d12_video_reference_frames get_current_reference_frames(); + + // Removes all pictures from DPB + // returns the number of resources marked as reusable + uint32_t clear_decode_picture_buffer(); + + // number of resources in the pool that are marked as in use + uint32_t get_number_of_in_use_allocations(); + + uint32_t get_number_of_tracked_allocations(); + + // d3d12_texture_array_dpb_manager + public: + d3d12_texture_array_dpb_manager( + uint16_t dpbInitialSize, // Maximum in use resources for a DPB of size x should be x+1 for cases when a P frame + // is using the x references in the L0 list and also using an extra resource to output + // it's own recon pic. + ID3D12Device * pDevice, + DXGI_FORMAT encodeSessionFormat, + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC encodeSessionResolution, + D3D12_RESOURCE_FLAGS resourceAllocFlags = D3D12_RESOURCE_FLAG_NONE, + uint32_t nodeMask = 0); + ~d3d12_texture_array_dpb_manager(); + + // d3d12_texture_array_dpb_manager + private: + void create_reconstructed_picture_allocations(ID3D12Resource **ppResource, uint16_t texArraySize); + + ID3D12Device * m_pDevice; + DXGI_FORMAT m_encodeFormat; + D3D12_VIDEO_ENCODER_PICTURE_RESOLUTION_DESC m_encodeResolution; + uint16_t m_dpbTextureArraySize = 0; + + // DPB with array of resources backing storage + + struct d3d12_video_dpb + { + std::vector pResources; + std::vector pSubresources; + std::vector pHeaps; + }; + + d3d12_video_dpb m_D3D12DPB; + + // Flags used when creating the resource pool + // Usually if reference only is needed for d3d12 video use + // D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE + // D3D12_RESOURCE_FLAG_VIDEO_ENCODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE + D3D12_RESOURCE_FLAGS m_resourceAllocFlags; + + // Pool of resources to be aliased by the DPB without giving memory ownership + // This resources are allocated and released by this implementation + struct d3d12_reusable_resource + { + ComPtr pResource; + uint32_t subresource; + bool isFree; + }; + + ComPtr m_baseTexArrayResource; + std::vector m_ResourcesPool; + + uint32_t m_nodeMask = 0u; +}; + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_video_types.h b/src/gallium/drivers/d3d12/d3d12_video_types.h new file mode 100644 index 00000000000..4ae2e63b9ed --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_video_types.h @@ -0,0 +1,119 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_VIDEO_TYPES_H +#define D3D12_VIDEO_TYPES_H + +#include +#include +#include +#include + +#include "pipe/p_context.h" +#include "pipe/p_video_codec.h" +#include "d3d12_fence.h" +#include "d3d12_debug.h" + +#include +#include + +#include +using Microsoft::WRL::ComPtr; + +// Allow encoder to continue the encoding session when an optional +// rate control mode such as the following is used but not supported +// +// D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_VBV_SIZES +// D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_MAX_FRAME_SIZE +// +// If setting this OS Env variable to true, the encoding process will continue, disregarding the settings +// requested for the optional RC mode +// + +const bool D3D12_VIDEO_ENC_FALLBACK_RATE_CONTROL_CONFIG = debug_get_bool_option("D3D12_VIDEO_ENC_FALLBACK_RATE_CONTROL_CONFIG", false); + +/* For CBR mode, to guarantee bitrate of generated stream complies with +* target bitrate (e.g. no over +/-10%), vbv_buffer_size should be same +* as target bitrate. Controlled by OS env var D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE +*/ +const bool D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE = debug_get_bool_option("D3D12_VIDEO_ENC_CBR_FORCE_VBV_EQUAL_BITRATE", false); + +// Allow encoder to continue the encoding session when aa slice mode +// is requested but not supported. +// +// If setting this OS Env variable to true, the encoder will try to adjust to the closest slice +// setting available and encode using that configuration anyway +// +const bool D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG = debug_get_bool_option("D3D12_VIDEO_ENC_FALLBACK_SLICE_CONFIG", false); + +constexpr unsigned int D3D12_VIDEO_H264_MB_IN_PIXELS = 16; + +enum d3d12_video_decode_config_specific_flags +{ + d3d12_video_decode_config_specific_flag_none = 0, + d3d12_video_decode_config_specific_flag_alignment_height = 1 << 12, // set by accelerator + d3d12_video_decode_config_specific_flag_array_of_textures = 1 << 14, // set by accelerator + d3d12_video_decode_config_specific_flag_reuse_decoder = + 1 << 15, // set by accelerator - This bit means that the decoder can be re-used with resolution change and bit + // depth change (including profile GUID change from 8bit to 10bit and vice versa). + d3d12_video_decode_config_specific_flag_reference_only_textures_required = 1 << 30, // custom created for WSL +}; + +enum d3d12_video_decode_profile_type +{ + d3d12_video_decode_profile_type_none, + d3d12_video_decode_profile_type_h264, + d3d12_video_decode_profile_type_max_valid +}; + +struct d3d12_video_decode_dpb_descriptor +{ + DXGI_FORMAT Format = DXGI_FORMAT_UNKNOWN; + uint64_t Width = 0; + uint32_t Height = 0; + bool fArrayOfTexture = false; + bool fReferenceOnly = false; + uint16_t dpbSize = 0; + uint32_t m_NodeMask = 0; +}; + +struct d3d12_video_decode_output_conversion_arguments +{ + BOOL Enable; + DXGI_COLOR_SPACE_TYPE OutputColorSpace; + D3D12_VIDEO_SAMPLE ReferenceInfo; + uint32_t ReferenceFrameCount; +}; + +void +d3d12_video_encoder_convert_from_d3d12_level_h264(D3D12_VIDEO_ENCODER_LEVELS_H264 level12, + uint32_t & specLevel, + uint32_t & constraint_set3_flag); +D3D12_VIDEO_ENCODER_PROFILE_H264 +d3d12_video_encoder_convert_profile_to_d3d12_enc_profile_h264(enum pipe_video_profile profile); +D3D12_VIDEO_ENCODER_CODEC +d3d12_video_encoder_convert_codec_to_d3d12_enc_codec(enum pipe_video_profile profile); +GUID +d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile); + +#endif diff --git a/src/gallium/drivers/d3d12/meson.build b/src/gallium/drivers/d3d12/meson.build index 8c761552e0b..78538956071 100644 --- a/src/gallium/drivers/d3d12/meson.build +++ b/src/gallium/drivers/d3d12/meson.build @@ -47,6 +47,12 @@ files_libd3d12 = files( 'd3d12_surface.cpp', 'd3d12_tcs_variant.cpp', 'D3D12ResourceState.cpp', + 'd3d12_video_dec.cpp', + 'd3d12_video_dec_references_mgr.cpp', + 'd3d12_video_dec_h264.cpp', + 'd3d12_video_buffer.cpp', + 'd3d12_video_texture_array_dpb_manager.cpp', + 'd3d12_video_array_of_textures_dpb_manager.cpp', ) if host_machine.system() == 'windows'