From 5425d32fcf400bac42a2667d230fb3823c3a3f0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Nov 2017 22:02:11 +0100 Subject: [PATCH] amd/addrlib: update to the latest version for Vega12 Reviewed-by: Alex Deucher --- src/amd/addrlib/addrinterface.cpp | 32 +++- src/amd/addrlib/addrinterface.h | 62 +++++--- src/amd/addrlib/addrtypes.h | 6 +- src/amd/addrlib/amdgpu_asic_addr.h | 3 + src/amd/addrlib/core/addrlib.cpp | 80 +++++++++- src/amd/addrlib/core/addrlib.h | 36 ++++- src/amd/addrlib/core/addrlib1.cpp | 14 +- src/amd/addrlib/core/addrlib2.cpp | 10 ++ src/amd/addrlib/core/addrlib2.h | 6 - src/amd/addrlib/gfx9/gfx9addrlib.cpp | 230 +++++++++++++++++++-------- src/amd/addrlib/gfx9/gfx9addrlib.h | 21 ++- src/amd/addrlib/r800/ciaddrlib.cpp | 40 +++-- src/amd/addrlib/r800/ciaddrlib.h | 4 +- src/amd/addrlib/r800/egbaddrlib.cpp | 8 +- src/amd/addrlib/r800/siaddrlib.cpp | 35 ++-- src/amd/addrlib/r800/siaddrlib.h | 4 +- src/amd/common/ac_surface.c | 2 +- 17 files changed, 442 insertions(+), 151 deletions(-) diff --git a/src/amd/addrlib/addrinterface.cpp b/src/amd/addrlib/addrinterface.cpp index 5fdf7fc3c65..112431e2cb4 100644 --- a/src/amd/addrlib/addrinterface.cpp +++ b/src/amd/addrlib/addrinterface.cpp @@ -1054,7 +1054,7 @@ ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( */ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( ADDR_HANDLE hLib, ///< address lib handle - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) ///< [out] output structure + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure { Addr::Lib* pLib = Lib::GetLib(hLib); @@ -1072,6 +1072,36 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( return returnCode; } +/** +**************************************************************************************************** +* AddrGetMaxMetaAlignments +* +* @brief +* Convert maximum alignments for metadata +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( + ADDR_HANDLE hLib, ///< address lib handle + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure +{ + Addr::Lib* pLib = Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->GetMaxMetaAlignments(pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index 8124b745f21..be9e5c2b81e 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -528,7 +528,8 @@ typedef union _ADDR_SURFACE_FLAGS UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface /// to make sure they share same tile config parameters - UINT_32 reserved : 2; ///< Reserved bits + UINT_32 disallowLargeThickDegrade : 1; ///< Disallow large thick tile degrade + UINT_32 reserved : 1; ///< Reserved bits }; UINT_32 value; @@ -2273,7 +2274,7 @@ typedef struct _ADDR_COMPUTE_DCCINFO_INPUT typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT { UINT_32 size; ///< Size of this structure in bytes - UINT_64 dccRamBaseAlign; ///< Base alignment of dcc key + UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key UINT_64 dccRamSize; ///< Size of dcc key UINT_64 dccFastClearSize; ///< Size of dcc key portion that can be fast cleared BOOL_32 subLvlCompressible; ///< Whether sub resource is compressiable @@ -2298,17 +2299,17 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( /** **************************************************************************************************** -* ADDR_GET_MAX_ALIGNMENTS_OUTPUT +* ADDR_GET_MAX_ALINGMENTS_OUTPUT * * @brief * Output structure of AddrGetMaxAlignments **************************************************************************************************** */ -typedef struct _ADDR_GET_MAX_ALIGNMENTS_OUTPUT +typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT { UINT_32 size; ///< Size of this structure in bytes - UINT_64 baseAlign; ///< Maximum base alignment in bytes -} ADDR_GET_MAX_ALIGNMENTS_OUTPUT; + UINT_32 baseAlign; ///< Maximum base alignment in bytes +} ADDR_GET_MAX_ALINGMENTS_OUTPUT; /** **************************************************************************************************** @@ -2320,9 +2321,19 @@ typedef struct _ADDR_GET_MAX_ALIGNMENTS_OUTPUT */ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( ADDR_HANDLE hLib, - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut); - + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); +/** +**************************************************************************************************** +* AddrGetMaxMetaAlignments +* +* @brief +* Gets maximnum alignments for metadata +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( + ADDR_HANDLE hLib, + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); /** **************************************************************************************************** @@ -2366,22 +2377,25 @@ typedef union _ADDR2_SURFACE_FLAGS { struct { - UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV - UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV - UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV - UINT_32 fmask : 1; ///< This is an fmask surface - UINT_32 overlay : 1; ///< This is an overlay surface - UINT_32 display : 1; ///< This resource is displable, can be used with DRV - UINT_32 prt : 1; ///< This is a partially resident texture - UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface - UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 texture : 1; ///< This resource can be used with SRV - UINT_32 unordered : 1; ///< This resource can be used with UAV - UINT_32 rotated : 1; ///< This resource is rotated and displable - UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible - UINT_32 opt4space : 1; ///< This resource should be optimized for space - UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment - UINT_32 reserved : 17; ///< Reserved bits + UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV + UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV + UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV + UINT_32 fmask : 1; ///< This is an fmask surface + UINT_32 overlay : 1; ///< This is an overlay surface + UINT_32 display : 1; ///< This resource is displable, can be used with DRV + UINT_32 prt : 1; ///< This is a partially resident texture + UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface + UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding + UINT_32 texture : 1; ///< This resource can be used with SRV + UINT_32 unordered : 1; ///< This resource can be used with UAV + UINT_32 rotated : 1; ///< This resource is rotated and displable + UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible + UINT_32 opt4space : 1; ///< This resource should be optimized for space + UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment + UINT_32 noMetadata : 1; ///< This resource has no metadata + UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata + UINT_32 metaPipeUnaligned : 1; ///< This resource has pipe unaligned metadata + UINT_32 reserved : 14; ///< Reserved bits }; UINT_32 value; diff --git a/src/amd/addrlib/addrtypes.h b/src/amd/addrlib/addrtypes.h index f8f96d54a10..c63ad96ff97 100644 --- a/src/amd/addrlib/addrtypes.h +++ b/src/amd/addrlib/addrtypes.h @@ -76,7 +76,7 @@ typedef int INT; #ifndef ADDR_STDCALL #if defined(__GNUC__) - #if defined(__AMD64__) + #if defined(__amd64__) || defined(__x86_64__) #define ADDR_STDCALL #else #define ADDR_STDCALL __attribute__((stdcall)) @@ -87,7 +87,9 @@ typedef int INT; #endif #ifndef ADDR_FASTCALL - #if defined(__GNUC__) + #if defined(BRAHMA_ARM) + #define ADDR_FASTCALL + #elif defined(__GNUC__) #if defined(__i386__) #define ADDR_FASTCALL __attribute__((regparm(0))) #else diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h index ea957a88b4d..d7232ba14a2 100644 --- a/src/amd/addrlib/amdgpu_asic_addr.h +++ b/src/amd/addrlib/amdgpu_asic_addr.h @@ -85,6 +85,7 @@ #define AMDGPU_STONEY_RANGE 0x61, 0xFF #define AMDGPU_VEGA10_RANGE 0x01, 0x14 +#define AMDGPU_VEGA12_RANGE 0x14, 0x28 #define AMDGPU_RAVEN_RANGE 0x01, 0x81 @@ -123,6 +124,8 @@ #define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) #define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12) +#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12) #define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index a6ac5ecf836..5af6dd1e339 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -285,10 +285,12 @@ ADDR_E_RETURNCODE Lib::Create( { pCreateOut->numEquations = pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable); - } - if ((pLib == NULL) && - (returnCode == ADDR_OK)) + pLib->SetMaxAlignments(); + + } + else if ((pLib == NULL) && + (returnCode == ADDR_OK)) { // Unknown failures, we return the general error code returnCode = ADDR_ERROR; @@ -336,6 +338,23 @@ VOID Lib::SetMinPitchAlignPixels( m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels; } +/** +**************************************************************************************************** +* Lib::SetMaxAlignments +* +* @brief +* Set max alignments +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::SetMaxAlignments() +{ + m_maxBaseAlign = HwlComputeMaxBaseAlignments(); + m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments(); +} + /** **************************************************************************************************** * Lib::GetLib @@ -358,21 +377,21 @@ Lib* Lib::GetLib( * Lib::GetMaxAlignments * * @brief -* Gets maximum alignments +* Gets maximum alignments for data surface (include FMask) * * @return * ADDR_E_RETURNCODE **************************************************************************************************** */ ADDR_E_RETURNCODE Lib::GetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure ) const { ADDR_E_RETURNCODE returnCode = ADDR_OK; if (GetFillSizeFieldsFlags() == TRUE) { - if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT)) + if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) { returnCode = ADDR_PARAMSIZEMISMATCH; } @@ -380,7 +399,54 @@ ADDR_E_RETURNCODE Lib::GetMaxAlignments( if (returnCode == ADDR_OK) { - returnCode = HwlGetMaxAlignments(pOut); + if (m_maxBaseAlign != 0) + { + pOut->baseAlign = m_maxBaseAlign; + } + else + { + returnCode = ADDR_NOTIMPLEMENTED; + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::GetMaxMetaAlignments +* +* @brief +* Gets maximum alignments for metadata (CMask, DCC and HTile) +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments( + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + if (m_maxMetaBaseAlign != 0) + { + pOut->baseAlign = m_maxMetaBaseAlign; + } + else + { + returnCode = ADDR_NOTIMPLEMENTED; + } } return returnCode; diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index 8db65a61c87..0cbb4e0186f 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -282,14 +282,38 @@ public: BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const; - ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; protected: Lib(); // Constructor is protected Lib(const Client* pClient); - /// Pure virtual function to get max alignments - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const = 0; + /// Pure virtual function to get max base alignments + virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0; + + /// Gets maximum alignements for metadata + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const + { + ADDR_NOT_IMPLEMENTED(); + + return 0; + } + + VOID ValidBaseAlignments(UINT_32 alignment) const + { +#if DEBUG + ADDR_ASSERT(alignment <= m_maxBaseAlign); +#endif + } + + VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const + { +#if DEBUG + ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign); +#endif + } // // Initialization @@ -341,6 +365,8 @@ private: VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); + VOID SetMaxAlignments(); + protected: LibClass m_class; ///< Store class type (HWL type) @@ -370,6 +396,10 @@ protected: UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels UINT_32 m_maxSamples; ///< Max numSamples + + UINT_32 m_maxBaseAlign; ///< Max base alignment for data surface + UINT_32 m_maxMetaBaseAlign; ///< Max base alignment for metadata + private: ElemLib* m_pElemLib; ///< Element Lib pointer }; diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp index c796a63436c..9c1d84289b3 100644 --- a/src/amd/addrlib/core/addrlib1.cpp +++ b/src/amd/addrlib/core/addrlib1.cpp @@ -428,6 +428,8 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( } } + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } @@ -895,6 +897,8 @@ ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( } } + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } @@ -1333,6 +1337,8 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo( } } + ValidMetaBaseAlignments(pOut->baseAlign); + return returnCode; } @@ -1399,6 +1405,8 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( } } + ValidMetaBaseAlignments(pOut->baseAlign); + return returnCode; } @@ -1443,9 +1451,11 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo( pIn = &input; } - if (ADDR_OK == ret) + if (ret == ADDR_OK) { ret = HwlComputeDccInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->dccRamBaseAlign); } } @@ -3652,7 +3662,7 @@ VOID Lib::OptimizeTileMode( tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; } - else if (thickness > 1) + else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0)) { // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to // thinner modes, we should re-evaluate whether the corresponding diff --git a/src/amd/addrlib/core/addrlib2.cpp b/src/amd/addrlib/core/addrlib2.cpp index ddaf597f9dd..fc9b71f3ee4 100644 --- a/src/amd/addrlib/core/addrlib2.cpp +++ b/src/amd/addrlib/core/addrlib2.cpp @@ -295,6 +295,8 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( ADDR_ASSERT(pOut->surfSize != 0); + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } @@ -447,6 +449,8 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo( else { returnCode = HwlComputeHtileInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->baseAlign); } return returnCode; @@ -545,6 +549,8 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( else { returnCode = HwlComputeCmaskInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->baseAlign); } return returnCode; @@ -688,6 +694,8 @@ ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( } } + ValidBaseAlignments(pOut->baseAlign); + return returnCode; } @@ -764,6 +772,8 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo( else { returnCode = HwlComputeDccInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->dccRamBaseAlign); } return returnCode; diff --git a/src/amd/addrlib/core/addrlib2.h b/src/amd/addrlib/core/addrlib2.h index bea2a485a61..d82e6c0984b 100644 --- a/src/amd/addrlib/core/addrlib2.h +++ b/src/amd/addrlib/core/addrlib2.h @@ -480,12 +480,6 @@ protected: return HwlGetEquationIndex(pIn, pOut); } - virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const - { - ADDR_NOT_IMPLEMENTED(); - return 0; - } - virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp index e06f13c0afe..b88d3243228 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp @@ -189,10 +189,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; - Dim3d metaBlkDim = {8, 8, 1}; + Dim3d metaBlkDim = {8, 8, 1}; UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; - UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); - UINT_32 heightAmp = totalAmpBits - widthAmp; + UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); + UINT_32 heightAmp = totalAmpBits - widthAmp; metaBlkDim.w <<= widthAmp; metaBlkDim.h <<= heightAmp; @@ -221,39 +221,42 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices, &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); - UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2; + UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + + if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) + { + align *= (numPipeTotal >> 1); + } + + align = Max(align, metaBlkSize); + + if (m_settings.metaBaseAlignFix) + { + align = Max(align, GetBlockSize(pIn->swizzleMode)); + } if (m_settings.htileAlignFix) { - sizeAlign <<= 1; + const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2; + const INT_32 htileCachelineSizeLog2 = 11; + const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal); + + INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits)); + + align <<= rbMaskPadding; } pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; - pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4; + pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize; - pOut->metaBlkWidth = metaBlkDim.w; - pOut->metaBlkHeight = metaBlkDim.h; + pOut->metaBlkWidth = metaBlkDim.w; + pOut->metaBlkHeight = metaBlkDim.h; pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; - pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign); - - if (m_settings.metaBaseAlignFix) - { - pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); - } - - if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) - { - UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2; - - if (additionalAlign > sizeAlign) - { - sizeAlign = additionalAlign; - } - } - - pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); + pOut->baseAlign = align; + pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align); return ADDR_OK; } @@ -333,17 +336,17 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1; pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign); - if (m_settings.metaBaseAlignFix) - { - pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); - } - pOut->metaBlkWidth = metaBlkDim.w; pOut->metaBlkHeight = metaBlkDim.h; @@ -638,16 +641,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( sizeAlign *= (numFrags / m_maxCompFrag); } + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ * numCompressBlkPerMetaBlk * numFrags; pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign); pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign); - if (m_settings.metaBaseAlignFix) - { - pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode)); - } - pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->depth = numMetaBlkZ * metaBlkDim.d; @@ -670,21 +673,78 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( /** ************************************************************************************************************************ -* Gfx9Lib::HwlGetMaxAlignments +* Gfx9Lib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments ************************************************************************************************************************ */ -ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const { - pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB); + return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB); +} - return ADDR_OK; +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const +{ + // Max base alignment for Htile + const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z); + const UINT_32 maxNumRbTotal = m_se * m_rbPerSe; + + // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2), + // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic. + ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u)); + const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u); + + UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes; + + if (maxNumPipeTotal > 2) + { + maxBaseAlignHtile *= (maxNumPipeTotal >> 1); + } + + maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB)); + } + + if (m_settings.htileAlignFix) + { + maxBaseAlignHtile *= maxNumPipeTotal; + } + + // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate + + // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate + UINT_32 maxBaseAlignDcc3D = 65536; + + if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1)) + { + maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u); + } + + // Max base alignment for Msaa Dcc + UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB)); + } + + return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D)); } /** @@ -724,9 +784,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, - Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, + Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -798,9 +860,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -870,9 +934,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, pIn->swizzleMode); @@ -948,10 +1014,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); - const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}); + MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -1055,6 +1123,10 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( break; } + // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, + // and any larger value requires a post-process (left shift) on the output pipeBankXor bits. + ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); + switch (gbAddrConfig.bits.NUM_BANKS) { case ADDR_CONFIG_1_BANK: @@ -1151,6 +1223,19 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( ADDR_ASSERT((m_blockVarSizeLog2 == 0) || ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u))); m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u); + + if ((m_rbPerSeLog2 == 1) && + (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) || + ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) + { + ADDR_ASSERT(m_settings.isVega10 == FALSE); + ADDR_ASSERT(m_settings.isRaven == FALSE); + + if (m_settings.isVega12) + { + m_settings.htileCacheRbConflict = 1; + } + } } else { @@ -1187,6 +1272,7 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily( case FAMILY_AI: m_settings.isArcticIsland = 1; m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); + m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision); m_settings.isDce12 = 1; @@ -3279,10 +3365,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( addrPreferredSwSet.value = AddrSwSetZ; addrValidSwSet.value = AddrSwSetZ; - if (pIn->flags.depth && pIn->flags.texture) + if (pIn->flags.noMetadata == FALSE) { - if (((bpp == 16) && (numFrags >= 4)) || - ((bpp == 32) && (numFrags >= 2))) + if (pIn->flags.depth && + pIn->flags.texture && + (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2)))) { // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane // equation from wrong address within memory range a tile covered and use the @@ -3290,6 +3377,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( pOut->canXor = FALSE; prtXor = FALSE; } + + if (m_settings.htileCacheRbConflict && + (pIn->flags.depth || pIn->flags.stencil) && + (slice > 1) && + (pIn->flags.metaRbUnaligned == FALSE) && + (pIn->flags.metaPipeUnaligned == FALSE)) + { + // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency + pOut->canXor = FALSE; + } } } else if (ElemLib::IsBlockCompressed(pIn->format)) @@ -3402,12 +3499,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( if (pIn->bpp == 64) { addrPreferredSwSet.value = AddrSwSetD; - addrValidSwSet.value = AddrSwSetD; + addrValidSwSet.value = AddrSwSetS | AddrSwSetD; } else { addrPreferredSwSet.value = AddrSwSetS; - addrValidSwSet.value = AddrSwSetS | AddrSwSetD; + addrValidSwSet.value = AddrSwSetS; } blockSet.micro = FALSE; @@ -4037,7 +4134,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( pOut->sliceSize = static_cast(pOut->mipChainPitch) * pOut->mipChainHeight * (pIn->bpp >> 3) * pIn->numFrags; pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; - pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode); + pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode); if (pIn->flags.prt) { @@ -4762,15 +4859,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock; - UINT_32 macroBlockIndex = + UINT_64 macroBlockIndex = (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock + ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + ((pIn->x / localOut.blockWidth) + mipStartPos.w); - UINT_64 macroBlockOffset = (static_cast(macroBlockIndex) << - GetBlockSizeLog2(pIn->swizzleMode)); - - pOut->addr = blockOffset | macroBlockOffset; + pOut->addr = blockOffset | (macroBlockIndex << log2blkSize); } else { @@ -4835,7 +4929,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 sliceSizeInBlock = (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; pOut->addr = blockOffset | (blockIndex << log2blkSize); } diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h index 1f233a4ff91..7c61a40880e 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.h +++ b/src/amd/addrlib/gfx9/gfx9addrlib.h @@ -55,19 +55,19 @@ struct Gfx9ChipSettings UINT_32 isArcticIsland : 1; UINT_32 isVega10 : 1; UINT_32 isRaven : 1; - UINT_32 reserved0 : 29; + UINT_32 isVega12 : 1; // Display engine IP version name UINT_32 isDce12 : 1; UINT_32 isDcn1 : 1; - UINT_32 reserved1 : 29; // Misc configuration bits UINT_32 metaBaseAlignFix : 1; UINT_32 depthPipeXorDisable : 1; UINT_32 htileAlignFix : 1; UINT_32 applyAliasFix : 1; - UINT_32 reserved2 : 28; + UINT_32 htileCacheRbConflict: 1; + UINT_32 reserved2 : 27; }; }; @@ -121,9 +121,6 @@ public: return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL; } - virtual BOOL_32 IsValidDisplaySwizzleMode( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - protected: Gfx9Lib(const Client* pClient); virtual ~Gfx9Lib(); @@ -224,7 +221,7 @@ protected: AddrSwizzleMode swMode, UINT_32 elementBytesLog2) const; - virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const + UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const { UINT_32 baseAlign; @@ -400,11 +397,11 @@ protected: static const UINT_32 MaxCachedMetaEq = 2; private: - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; - virtual BOOL_32 HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn); + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; + + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn); VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const; @@ -434,6 +431,8 @@ private: UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth, UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const; + BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32* pMipmap0PaddedWidth, diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp index 322dcf64ffd..1b982c5c08b 100644 --- a/src/amd/addrlib/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/r800/ciaddrlib.cpp @@ -736,7 +736,7 @@ ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo( SiLib::HwlComputeSurfaceInfo(&localIn, pOut); - ADDR_ASSERT(((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)) || pOut->tileIndex == Depth1DThinIndex); + ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)); depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut); } @@ -2157,29 +2157,27 @@ VOID CiLib::HwlPadDimensions( /** **************************************************************************************************** -* CiLib::HwlGetMaxAlignments +* CiLib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments **************************************************************************************************** */ -ADDR_E_RETURNCODE CiLib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 CiLib::HwlComputeMaxBaseAlignments() const { const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); // Initial size is 64 KiB for PRT. - UINT_64 maxBaseAlign = 64 * 1024; + UINT_32 maxBaseAlign = 64 * 1024; for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) { // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes; - UINT_64 baseAlign = tileSize * pipes * m_macroTileTable[i].banks * + UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks * m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight; if (baseAlign > maxBaseAlign) @@ -2188,12 +2186,32 @@ ADDR_E_RETURNCODE CiLib::HwlGetMaxAlignments( } } - if (pOut != NULL) + return maxBaseAlign; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +**************************************************************************************************** +*/ +UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const +{ + UINT_32 maxBank = 1; + + for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) { - pOut->baseAlign = maxBaseAlign; + if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode)) + { + maxBank = Max(maxBank, m_macroTileTable[i].banks); + } } - return ADDR_OK; + return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank; } /** diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h index c11b678574f..28c19f06031 100644 --- a/src/amd/addrlib/r800/ciaddrlib.h +++ b/src/amd/addrlib/r800/ciaddrlib.h @@ -137,7 +137,9 @@ protected: const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; virtual VOID HwlPadDimensions( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index 99aa6cf4cdb..3947cfda2fd 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -100,11 +100,13 @@ BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo( ADDR_TILEINFO tileInfoDef = {0}; ADDR_TILEINFO* pTileInfo = &tileInfoDef; - - UINT_32 padDims = 0; + UINT_32 padDims = 0; BOOL_32 valid; - tileMode = DegradeLargeThickTile(tileMode, bpp); + if (pIn->flags.disallowLargeThickDegrade == 0) + { + tileMode = DegradeLargeThickTile(tileMode, bpp); + } // Only override numSamples for NI above if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp index 0fb5c2befdc..3c17a7aa8d7 100644 --- a/src/amd/addrlib/r800/siaddrlib.cpp +++ b/src/amd/addrlib/r800/siaddrlib.cpp @@ -3468,22 +3468,20 @@ VOID SiLib::HwlSelectTileMode( /** **************************************************************************************************** -* SiLib::HwlGetMaxAlignments +* SiLib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments **************************************************************************************************** */ -ADDR_E_RETURNCODE SiLib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 SiLib::HwlComputeMaxBaseAlignments() const { const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); // Initial size is 64 KiB for PRT. - UINT_64 maxBaseAlign = 64 * 1024; + UINT_32 maxBaseAlign = 64 * 1024; for (UINT_32 i = 0; i < m_noOfEntries; i++) { @@ -3494,7 +3492,7 @@ ADDR_E_RETURNCODE SiLib::HwlGetMaxAlignments( UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes, MicroTilePixels * 8 * 16); - UINT_64 baseAlign = tileSize * pipes * m_tileTable[i].info.banks * + UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks * m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight; if (baseAlign > maxBaseAlign) @@ -3504,12 +3502,29 @@ ADDR_E_RETURNCODE SiLib::HwlGetMaxAlignments( } } - if (pOut != NULL) + return maxBaseAlign; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const +{ + UINT_32 maxPipe = 1; + + for (UINT_32 i = 0; i < m_noOfEntries; i++) { - pOut->baseAlign = maxBaseAlign; + maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info)); } - return ADDR_OK; + return m_pipeInterleaveBytes * maxPipe; } /** diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h index f07fc31a57d..9c879fe6c36 100644 --- a/src/amd/addrlib/r800/siaddrlib.h +++ b/src/amd/addrlib/r800/siaddrlib.h @@ -263,7 +263,9 @@ protected: return TRUE; } - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 92bdf1dedec..603b7058bdc 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -163,7 +163,7 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info, ADDR_CREATE_OUTPUT addrCreateOutput = {0}; ADDR_REGISTER_VALUE regValue = {0}; ADDR_CREATE_FLAGS createFlags = {{0}}; - ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; + ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; ADDR_E_RETURNCODE addrRet; addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);