From f903a4be9ffa6a555d9505e0ff74c493cb2fc226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 17 Mar 2021 16:29:51 -0400 Subject: [PATCH] amd: update addrlib Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/addrlib/inc/addrinterface.h | 64 ++- src/amd/addrlib/inc/addrtypes.h | 18 +- src/amd/addrlib/src/addrinterface.cpp | 41 +- src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h | 2 +- src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h | 2 +- src/amd/addrlib/src/core/addrcommon.h | 17 +- src/amd/addrlib/src/core/addrlib2.cpp | 121 ++++- src/amd/addrlib/src/core/addrlib2.h | 42 +- src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 430 ++++++++++++++---- src/amd/addrlib/src/gfx10/gfx10addrlib.h | 4 + src/amd/addrlib/src/gfx9/gfx9addrlib.cpp | 184 +++++--- src/amd/common/ac_surface.c | 2 +- 12 files changed, 750 insertions(+), 177 deletions(-) diff --git a/src/amd/addrlib/inc/addrinterface.h b/src/amd/addrlib/inc/addrinterface.h index caeb1b46010..bbde582a13e 100644 --- a/src/amd/addrlib/inc/addrinterface.h +++ b/src/amd/addrlib/inc/addrinterface.h @@ -2321,7 +2321,7 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( * Output structure of AddrGetMaxAlignments **************************************************************************************************** */ -typedef struct _ADDR_GET_MAX_ALIGNMENTS_OUTPUT +typedef struct ADDR_GET_MAX_ALINGMENTS_OUTPUT { UINT_32 size; ///< Size of this structure in bytes UINT_32 baseAlign; ///< Maximum base alignment in bytes @@ -3627,6 +3627,62 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); +/** +**************************************************************************************************** +* ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT +* +* @brief +* Input structure of Addr2ComputeNonBlockCompressedView +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + ADDR2_SURFACE_FLAGS flags; ///< Surface flags + AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9 + AddrResourceType resourceType; ///< Surface type + AddrFormat format; ///< Surface format + UINT_32 width; ///< Width of mip0 in texels (not in compressed block) + UINT_32 height; ///< Height of mip0 in texels (not in compressed block) + UINT_32 numSlices; ///< Number surface slice/depth of mip0 + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation + UINT_32 slice; ///< Index of slice to view + UINT_32 mipId; ///< Id of mip to view +} ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT +* +* @brief +* Output structure of Addr2ComputeNonBlockCompressedView +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_64 offset; ///< Offset shifted from resource base for the view + UINT_32 pipeBankXor; ///< Pipe bank xor for the view + UINT_32 unalignedWidth; ///< Mip0 width (in element) for the view + UINT_32 unalignedHeight; ///< Mip0 height (in element) for the view + UINT_32 numMipLevels; ///< Total mipmap levels for the view + UINT_32 mipId; ///< Mip ID for the view +} ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeNonBlockCompressedView +* +* @brief +* Compute non-block-compressed view for a given mipmap level/slice +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeNonBlockCompressedView( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut); + /** **************************************************************************************************** * ADDR2_BLOCK_SET @@ -3764,6 +3820,8 @@ typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT UINT_32 maxAlign; ///< maximum base/size alignment requested by client UINT_32 minSizeAlign; ///< memory allocated for surface in client driver will /// be padded to multiple of this value (in bytes) + DOUBLE memoryBudget; ///< Memory consumption ratio based on minimum possible + /// size. } ADDR2_GET_PREFERRED_SURF_SETTING_INPUT; /** @@ -3806,14 +3864,14 @@ ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( * Addr2IsValidDisplaySwizzleMode * * @brief -* Return whether the swizzle mode is supported by DCE / DCN. +* Return whether the swizzle mode is supported by display engine **************************************************************************************************** */ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( ADDR_HANDLE hLib, AddrSwizzleMode swizzleMode, UINT_32 bpp, - bool *result); + BOOL_32 *pResult); #if defined(__cplusplus) } diff --git a/src/amd/addrlib/inc/addrtypes.h b/src/amd/addrlib/inc/addrtypes.h index 095fc27ad34..55141ab67cf 100644 --- a/src/amd/addrlib/inc/addrtypes.h +++ b/src/amd/addrlib/inc/addrtypes.h @@ -49,6 +49,10 @@ typedef void VOID; typedef float FLOAT; #endif +#if !defined(DOUBLE) +typedef double DOUBLE; +#endif + #if !defined(CHAR) typedef char CHAR; #endif @@ -68,7 +72,11 @@ typedef int INT; */ #ifndef ADDR_CDECL #if defined(__GNUC__) - #define ADDR_CDECL __attribute__((cdecl)) + #if defined(__i386__) + #define ADDR_CDECL __attribute__((cdecl)) + #else + #define ADDR_CDECL + #endif #else #define ADDR_CDECL __cdecl #endif @@ -76,10 +84,10 @@ typedef int INT; #ifndef ADDR_STDCALL #if defined(__GNUC__) - #if defined(__amd64__) || defined(__x86_64__) - #define ADDR_STDCALL - #else + #if defined(__i386__) #define ADDR_STDCALL __attribute__((stdcall)) + #else + #define ADDR_STDCALL #endif #else #define ADDR_STDCALL __stdcall @@ -652,7 +660,7 @@ typedef enum _AddrTileType #endif #ifndef INT_8 -#define INT_8 signed char +#define INT_8 signed char // signed must be used because of aarch64 #endif #ifndef UINT_8 diff --git a/src/amd/addrlib/src/addrinterface.cpp b/src/amd/addrlib/src/addrinterface.cpp index 72f2ebed6d1..b2d032ef7d3 100644 --- a/src/amd/addrlib/src/addrinterface.cpp +++ b/src/amd/addrlib/src/addrinterface.cpp @@ -1705,6 +1705,35 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( return returnCode; } +/** +**************************************************************************************************** +* Addr2ComputeNonBlockCompressedView +* +* @brief +* Compute non-block-compressed view for a given mipmap level/slice. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeNonBlockCompressedView( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeNonBlockCompressedView(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + /** **************************************************************************************************** * Addr2GetPreferredSurfaceSetting @@ -1739,14 +1768,14 @@ ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( * Addr2IsValidDisplaySwizzleMode * * @brief -* Return whether the swizzle mode is supported by DCE / DCN. +* Return whether the swizzle mode is supported by display engine **************************************************************************************************** */ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( ADDR_HANDLE hLib, AddrSwizzleMode swizzleMode, UINT_32 bpp, - bool *result) + BOOL_32 *pResult) { ADDR_E_RETURNCODE returnCode; @@ -1754,12 +1783,12 @@ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( if (pLib != NULL) { - ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0}; + ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {}; in.resourceType = ADDR_RSRC_TEX_2D; - in.swizzleMode = swizzleMode; - in.bpp = bpp; + in.swizzleMode = swizzleMode; + in.bpp = bpp; - *result = pLib->IsValidDisplaySwizzleMode(&in); + *pResult = pLib->IsValidDisplaySwizzleMode(&in); returnCode = ADDR_OK; } else diff --git a/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h b/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h index 9a7f7159c87..3e3aa66a771 100644 --- a/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h +++ b/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h @@ -43,7 +43,7 @@ #error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" #endif -union GB_ADDR_CONFIG_gfx10 +union GB_ADDR_CONFIG_GFX10 { struct { diff --git a/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h b/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h index 988b7c05c49..ca89af6920b 100644 --- a/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h +++ b/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h @@ -43,7 +43,7 @@ #error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" #endif -union GB_ADDR_CONFIG_gfx9 { +union GB_ADDR_CONFIG_GFX9 { struct { #if defined(LITTLEENDIAN_CPU) unsigned int NUM_PIPES : 3; diff --git a/src/amd/addrlib/src/core/addrcommon.h b/src/amd/addrlib/src/core/addrcommon.h index d592f04be54..1871c737b7a 100644 --- a/src/amd/addrlib/src/core/addrcommon.h +++ b/src/amd/addrlib/src/core/addrcommon.h @@ -943,7 +943,7 @@ static inline UINT_32 GetCoordActiveMask( * ShiftCeil * * @brief -* Apply righ-shift with ceiling +* Apply right-shift with ceiling **************************************************************************************************** */ static inline UINT_32 ShiftCeil( @@ -953,6 +953,21 @@ static inline UINT_32 ShiftCeil( return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0); } +/** +**************************************************************************************************** +* ShiftRight +* +* @brief +* Return right-shift value and minimum is 1 +**************************************************************************************************** +*/ +static inline UINT_32 ShiftRight( + UINT_32 a, ///< [in] value to be right-shifted + UINT_32 b) ///< [in] number of bits to shift +{ + return Max(a >> b, 1u); +} + } // Addr #endif // __ADDR_COMMON_H__ diff --git a/src/amd/addrlib/src/core/addrlib2.cpp b/src/amd/addrlib/src/core/addrlib2.cpp index 07021985a89..8bea9ecf91e 100644 --- a/src/amd/addrlib/src/core/addrlib2.cpp +++ b/src/amd/addrlib/src/core/addrlib2.cpp @@ -948,6 +948,37 @@ ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern( return returnCode; } +/** +************************************************************************************************************************ +* Lib::ComputeNonBlockCompressedView +* +* @brief +* Interface function stub of Addr2ComputeNonBlockCompressedView. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeNonBlockCompressedView( + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeNonBlockCompressedView(pIn, pOut); + } + + return returnCode; +} + /** ************************************************************************************************************************ * Lib::ExtractPipeBankXor @@ -1979,7 +2010,7 @@ VOID Lib::FilterInvalidEqSwizzleMode( const UINT_32 rsrcTypeIdx = static_cast(resourceType) - 1; UINT_32 validSwModeSet = allowedSwModeSetVal; - for (UINT_32 swModeIdx = 0; validSwModeSet != 0; swModeIdx++) + for (UINT_32 swModeIdx = 1; validSwModeSet != 0; swModeIdx++) { if (validSwModeSet & 1) { @@ -2000,6 +2031,94 @@ VOID Lib::FilterInvalidEqSwizzleMode( } } +/** +************************************************************************************************************************ +* Lib::IsBlockTypeAvaiable +* +* @brief +* Determine whether a block type is allowed in a given blockSet +* +* @return +* N/A +************************************************************************************************************************ +*/ +BOOL_32 Lib::IsBlockTypeAvaiable( + ADDR2_BLOCK_SET blockSet, + AddrBlockType blockType) +{ + BOOL_32 avail; + + if (blockType == AddrBlockLinear) + { + avail = blockSet.linear ? TRUE : FALSE; + } + else + { + avail = blockSet.value & (1 << (static_cast(blockType) - 1)) ? TRUE : FALSE; + } + + return avail; +} + +/** +************************************************************************************************************************ +* Lib::BlockTypeWithinMemoryBudget +* +* @brief +* Determine whether a new block type is acceptible based on memory waste ratio +* +* @return +* N/A +************************************************************************************************************************ +*/ +BOOL_32 Lib::BlockTypeWithinMemoryBudget( + UINT_64 minSize, + UINT_64 newBlockTypeSize, + UINT_32 ratioLow, + UINT_32 ratioHi, + DOUBLE memoryBudget, + BOOL_32 newBlockTypeBigger) +{ + BOOL_32 accept = FALSE; + + if (memoryBudget >= 1.0) + { + if (newBlockTypeBigger) + { + if ((static_cast(newBlockTypeSize) / minSize) <= memoryBudget) + { + accept = TRUE; + } + } + else + { + if ((static_cast(minSize) / newBlockTypeSize) > memoryBudget) + { + accept = TRUE; + } + } + } + else + { + if (newBlockTypeBigger) + { + if ((newBlockTypeSize * ratioHi) <= (minSize * ratioLow)) + { + accept = TRUE; + } + } + else + { + if ((newBlockTypeSize * ratioLow) < (minSize * ratioHi)) + { + accept = TRUE; + } + } + } + + return accept; +} + #if DEBUG /** ************************************************************************************************************************ diff --git a/src/amd/addrlib/src/core/addrlib2.h b/src/amd/addrlib/src/core/addrlib2.h index 55dd9f975ac..ac0ae11a974 100644 --- a/src/amd/addrlib/src/core/addrlib2.h +++ b/src/amd/addrlib/src/core/addrlib2.h @@ -89,16 +89,15 @@ struct Dim3d // Macro define resource block type enum AddrBlockType { - AddrBlockMicro = 0, // Resource uses 256B block - AddrBlockThin4KB = 1, // Resource uses thin 4KB block - AddrBlockThick4KB = 2, // Resource uses thick 4KB block - AddrBlockThin64KB = 3, // Resource uses thin 64KB block - AddrBlockThick64KB = 4, // Resource uses thick 64KB block - AddrBlockThinVar = 5, // Resource uses thin var block - AddrBlockThickVar = 6, // Resource uses thick var block - AddrBlockLinear, // Resource uses linear swizzle mode - - AddrBlockMaxTiledType = AddrBlockLinear, + AddrBlockLinear = 0, // Resource uses linear swizzle mode + AddrBlockMicro = 1, // Resource uses 256B block + AddrBlockThin4KB = 2, // Resource uses thin 4KB block + AddrBlockThick4KB = 3, // Resource uses thick 4KB block + AddrBlockThin64KB = 4, // Resource uses thin 64KB block + AddrBlockThick64KB = 5, // Resource uses thick 64KB block + AddrBlockThinVar = 6, // Resource uses thin var block + AddrBlockThickVar = 7, // Resource uses thick var block + AddrBlockMaxTiledType, }; @@ -292,6 +291,10 @@ public: const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); + ADDR_E_RETURNCODE ComputeNonBlockCompressedView( + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut); + ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting( const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; @@ -640,7 +643,6 @@ protected: return ADDR_NOTSUPPORTED; } - virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern( const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const @@ -649,6 +651,14 @@ protected: return ADDR_NOTSUPPORTED; } + virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView( + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting( const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const @@ -912,6 +922,16 @@ protected: AddrResourceType resourceType, UINT_32 elemLog2) const; + static BOOL_32 IsBlockTypeAvaiable(ADDR2_BLOCK_SET blockSet, AddrBlockType blockType); + + static BOOL_32 BlockTypeWithinMemoryBudget( + UINT_64 minSize, + UINT_64 newBlockTypeSize, + UINT_32 ratioLow, + UINT_32 ratioHi, + DOUBLE memoryBudget = 0.0f, + BOOL_32 newBlockTypeBigger = TRUE); + #if DEBUG VOID ValidateStereoInfo( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp index 841901524f4..0ffee23467c 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp @@ -788,8 +788,8 @@ VOID Gfx10Lib::HwlComputeDccAddrFromCoord( BOOL_32 Gfx10Lib::HwlInitGlobalParams( const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input { - BOOL_32 valid = TRUE; - GB_ADDR_CONFIG_gfx10 gbAddrConfig; + BOOL_32 valid = TRUE; + GB_ADDR_CONFIG_GFX10 gbAddrConfig; gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; @@ -2203,6 +2203,190 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern( return ADDR_OK; } +/** +************************************************************************************************************************ +* Gfx10Lib::HwlComputeNonBlockCompressedView +* +* @brief +* Compute non-block-compressed view for a given mipmap level/slice. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pIn->resourceType != ADDR_RSRC_TEX_2D) + { + // Only 2D resource can have a NonBC view... + returnCode = ADDR_INVALIDPARAMS; + } + else if ((pIn->format != ADDR_FMT_ASTC_8x8) && + ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7))) + { + // Only support BC1~BC7 or ASTC_8x8 for now... + returnCode = ADDR_NOTSUPPORTED; + } + else + { + UINT_32 bcWidth, bcHeight; + UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight); + + ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {}; + infoIn.flags = pIn->flags; + infoIn.swizzleMode = pIn->swizzleMode; + infoIn.resourceType = pIn->resourceType; + infoIn.bpp = bpp; + infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth; + infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight; + infoIn.numSlices = pIn->numSlices; + infoIn.numMipLevels = pIn->numMipLevels; + infoIn.numSamples = 1; + infoIn.numFrags = 1; + + ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {}; + + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {}; + infoOut.pMipInfo = mipInfo; + + const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE; + + if (tiled) + { + returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut); + } + else + { + returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut); + } + + if (returnCode == ADDR_OK) + { + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {}; + subOffIn.swizzleMode = infoIn.swizzleMode; + subOffIn.resourceType = infoIn.resourceType; + subOffIn.slice = pIn->slice; + subOffIn.sliceSize = infoOut.sliceSize; + subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset; + subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset; + + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {}; + + // For any mipmap level, move nonBc view base address by offset + HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut); + pOut->offset = subOffOut.offset; + + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {}; + slicePbXorIn.bpe = infoIn.bpp; + slicePbXorIn.swizzleMode = infoIn.swizzleMode; + slicePbXorIn.resourceType = infoIn.resourceType; + slicePbXorIn.basePipeBankXor = pIn->pipeBankXor; + slicePbXorIn.slice = pIn->slice; + + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {}; + + // For any mipmap level, nonBc view should use computed pbXor + HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut); + pOut->pipeBankXor = slicePbXorOut.pipeBankXor; + + const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE; + const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth; + const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight; + + if (inTail) + { + // For mipmap level that is in mip tail block, hack a lot of things... + // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels + // are fit in tail block: + + // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain) + pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail; + + // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!) + pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u); + + // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold + pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2); + + // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold + pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight); + } + // This check should cover at least mipId == 0 + else if (requestMipWidth << pIn->mipId == infoIn.width) + { + // For mipmap level [N] that is not in mip tail block and downgraded without losing element: + // - only one mipmap level and mipId = 0 + pOut->mipId = 0; + pOut->numMipLevels = 1; + + // (mip0) width = requestMipWidth + pOut->unalignedWidth = requestMipWidth; + + // (mip0) height = requestMipHeight + pOut->unalignedHeight = requestMipHeight; + } + else + { + // For mipmap level [N] that is not in mip tail block and downgraded with element losing, + // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed, + // because single mip view may have different pitch value than original (multiple) mip view... + // A simple case would be: + // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40] + // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view + // mip0 width = 0x101/mip1 width = 0x80 + // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in + // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes. + + // - 2 levels and mipId = 1 + pOut->mipId = 1; + pOut->numMipLevels = 2; + + const UINT_32 upperMipWidth = + PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth; + const UINT_32 upperMipHeight = + PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight; + + const BOOL_32 needToAvoidInTail = + tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ? + TRUE : FALSE; + + const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth); + const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight); + + const BOOL_32 needExtraWidth = + ((upperMipWidth < requestMipWidth * 2) || + ((upperMipWidth == requestMipWidth * 2) && + ((needToAvoidInTail == TRUE) || + (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE; + + const BOOL_32 needExtraHeight = + ((upperMipHeight < requestMipHeight * 2) || + ((upperMipHeight == requestMipHeight * 2) && + ((needToAvoidInTail == TRUE) || + (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE; + + // (mip0) width = requestLastMipLevelWidth + pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0); + + // (mip0) height = requestLastMipLevelHeight + pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0); + } + + // Assert the downgrading from this mip[0] width would still generate correct mip[N] width + ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth); + // Assert the downgrading from this mip[0] height would still generate correct mip[N] height + ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight); + } + } + + return returnCode; +} + /** ************************************************************************************************************************ * Gfx10Lib::ValidateNonSwModeParams @@ -2548,19 +2732,14 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); } - if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])) + if (BlockTypeWithinMemoryBudget(padSize[0], + padSize[1], + ratioLow, + ratioHi, + pIn->memoryBudget, + GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))) { - if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow)) - { - use64KbBlockType = FALSE; - } - } - else - { - if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi)) - { - use64KbBlockType = FALSE; - } + use64KbBlockType = FALSE; } } else if (forbidVarBlockType) @@ -2633,16 +2812,16 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask; allowedSwModeSet.value |= pIn->forbiddenBlock.macroThin4KB ? 0 : - ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask); + ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask); allowedSwModeSet.value |= pIn->forbiddenBlock.macroThick4KB ? 0 : - ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0); + ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0); allowedSwModeSet.value |= pIn->forbiddenBlock.macroThin64KB ? 0 : - ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask); + ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask); allowedSwModeSet.value |= pIn->forbiddenBlock.macroThick64KB ? 0 : - ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0); + ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0); allowedSwModeSet.value |= pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0); @@ -2796,8 +2975,15 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( } else { - // Always ignore linear swizzle mode if there is other choice. - allowedSwModeSet.swLinear = 0; + const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0); + + if ((height > 1) && (computeMinSize == FALSE)) + { + // Always ignore linear swizzle mode if: + // 1. This is a (2D/3D) resource with height > 1 + // 2. Client doesn't require computing minimize size + allowedSwModeSet.swLinear = 0; + } ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); @@ -2806,6 +2992,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( { AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {}; + swMode[AddrBlockLinear] = ADDR_SW_LINEAR; + if (m_blockVarSizeLog2 != 0) { swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X; @@ -2824,99 +3012,167 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S; } - Dim3d blkDim[AddrBlockMaxTiledType] = {}; - Dim3d padDim[AddrBlockMaxTiledType] = {}; UINT_64 padSize[AddrBlockMaxTiledType] = {}; - const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); - const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1); + const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); + const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); UINT_32 minSizeBlk = AddrBlockMicro; UINT_64 minSize = 0; - for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; + + for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) { - if (allowedBlockSet.value & (1 << i)) + if (IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) { - ComputeBlockDimensionForSurf(&blkDim[i].w, - &blkDim[i].h, - &blkDim[i].d, - bpp, - numFrags, - pOut->resourceType, - swMode[i]); + localIn.swizzleMode = swMode[i]; - padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]); - padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement); - - if (minSize == 0) + if (localIn.swizzleMode == ADDR_SW_LINEAR) { - minSize = padSize[i]; - minSizeBlk = i; + returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut); } else { - // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR - // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from - // smaller block type to bigger block type. So we have to correct comparing logic - // according to the size of existing "minimun block" and size of coming/comparing - // block. The new logic can also be useful to any future change about AddrBlockType. - if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) + returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut); + } + + if (returnCode == ADDR_OK) + { + padSize[i] = localOut.surfSize; + + if (minSize == 0) { - if ((padSize[i] * ratioHi) <= (minSize * ratioLow)) - { - minSize = padSize[i]; - minSizeBlk = i; - } + minSize = padSize[i]; + minSizeBlk = i; } else { - if ((padSize[i] * ratioLow) < (minSize * ratioHi)) + if (BlockTypeWithinMemoryBudget( + minSize, + padSize[i], + ratioLow, + ratioHi, + 0.0, + GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))) { minSize = padSize[i]; minSizeBlk = i; } } } + else + { + ADDR_ASSERT_ALWAYS(); + break; + } } } - if ((allowedBlockSet.micro == TRUE) && - (width <= blkDim[AddrBlockMicro].w) && - (height <= blkDim[AddrBlockMicro].h)) + if (pIn->memoryBudget > 1.0) { - minSizeBlk = AddrBlockMicro; + // If minimum size is given by swizzle mode with bigger-block type, then don't ever check + // smaller-block type again in coming loop + switch (minSizeBlk) + { + case AddrBlockThick64KB: + allowedBlockSet.macroThin64KB = 0; + case AddrBlockThinVar: + case AddrBlockThin64KB: + allowedBlockSet.macroThick4KB = 0; + case AddrBlockThick4KB: + allowedBlockSet.macroThin4KB = 0; + case AddrBlockThin4KB: + allowedBlockSet.micro = 0; + case AddrBlockMicro: + allowedBlockSet.linear = 0; + case AddrBlockLinear: + break; + + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) + { + if ((i != minSizeBlk) && + IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + { + if (BlockTypeWithinMemoryBudget( + minSize, + padSize[i], + 0, + 0, + pIn->memoryBudget, + GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE) + { + // Clear the block type if the memory waste is unacceptable + allowedBlockSet.value &= ~(1u << (i - 1)); + } + } + } + + // Remove VAR block type if bigger block type is allowed + if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X)) + { + if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB) + { + allowedBlockSet.var = 0; + } + } + + // Remove linear block type if 2 or more block types are allowed + if (IsPow2(allowedBlockSet.value) == FALSE) + { + allowedBlockSet.linear = 0; + } + + // Select the biggest allowed block type + minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1; + + minSizeBlk = (minSizeBlk == AddrBlockMaxTiledType) ? AddrBlockLinear : minSizeBlk; } - if (minSizeBlk == AddrBlockMicro) + switch (minSizeBlk) { - ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx10Blk256BSwModeMask; - } - else if (minSizeBlk == AddrBlockThick4KB) - { - ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask; - } - else if (minSizeBlk == AddrBlockThin4KB) - { - ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask; - } - else if (minSizeBlk == AddrBlockThick64KB) - { - ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask; - } - else if (minSizeBlk == AddrBlockThin64KB) - { - allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? - Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask; - } - else - { - ADDR_ASSERT(minSizeBlk == AddrBlockThinVar); - allowedSwModeSet.value &= Gfx10BlkVarSwModeMask; + case AddrBlockLinear: + allowedSwModeSet.value &= Gfx10LinearSwModeMask; + break; + + case AddrBlockMicro: + ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx10Blk256BSwModeMask; + break; + + case AddrBlockThin4KB: + ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask; + break; + + case AddrBlockThick4KB: + ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask; + break; + + case AddrBlockThin64KB: + allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? + Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask; + break; + + case AddrBlockThick64KB: + ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask; + break; + + case AddrBlockThinVar: + allowedSwModeSet.value &= Gfx10BlkVarSwModeMask; + break; + + default: + ADDR_ASSERT_ALWAYS(); + allowedSwModeSet.value = 0; + break; } } @@ -2926,7 +3182,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); // Determine swizzle type if there are 2 or more swizzle type candidates - if (IsPow2(allowedSwSet.value) == FALSE) + if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE)) { if (ElemLib::IsBlockCompressed(pIn->format)) { @@ -3002,10 +3258,10 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( allowedSwModeSet.value &= Gfx10ZSwModeMask; } } - } - // Swizzle type should be determined. - ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); + // Swizzle type should be determined. + ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); + } // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.h b/src/amd/addrlib/src/gfx10/gfx10addrlib.h index 7d53d123483..6303c0801b7 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.h +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.h @@ -304,6 +304,10 @@ protected: const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const; + virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView( + const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) const; + virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting( const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp index bb85ed21be4..59a4c86129e 100644 --- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp @@ -1070,7 +1070,7 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( if (m_settings.isArcticIsland) { - GB_ADDR_CONFIG_gfx9 gbAddrConfig; + GB_ADDR_CONFIG_GFX9 gbAddrConfig; gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; @@ -3641,8 +3641,15 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( } else { - // Always ignore linear swizzle mode if there is other choice. - allowedSwModeSet.swLinear = 0; + const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0); + + if ((height > 1) && (computeMinSize == FALSE)) + { + // Always ignore linear swizzle mode if: + // 1. This is a (2D/3D) resource with height > 1 + // 2. Client doesn't require computing minimize size + allowedSwModeSet.swLinear = 0; + } ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); @@ -3651,6 +3658,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( { AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {}; + swMode[AddrBlockLinear] = ADDR_SW_LINEAR; swMode[AddrBlockMicro] = ADDR_SW_256B_D; swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D; swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D; @@ -3661,78 +3669,134 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S; } - Dim3d blkDim[AddrBlockMaxTiledType] = {}; - Dim3d padDim[AddrBlockMaxTiledType] = {}; UINT_64 padSize[AddrBlockMaxTiledType] = {}; - const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); - const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1); + const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); + const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); UINT_32 minSizeBlk = AddrBlockMicro; UINT_64 minSize = 0; - for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) - { - if (allowedBlockSet.value & (1 << i)) - { - ComputeBlockDimensionForSurf(&blkDim[i].w, - &blkDim[i].h, - &blkDim[i].d, - bpp, - numFrags, - pOut->resourceType, - swMode[i]); + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; - if (displayRsrc) + for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) + { + if (IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + { + localIn.swizzleMode = swMode[i]; + + if (localIn.swizzleMode == ADDR_SW_LINEAR) { - blkDim[i].w = PowTwoAlign(blkDim[i].w, 32); + returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut); + } + else + { + returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut); } - padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]); - padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement); - - if ((minSize == 0) || - ((padSize[i] * ratioHi) <= (minSize * ratioLow))) + if (returnCode == ADDR_OK) { - minSize = padSize[i]; - minSizeBlk = i; + padSize[i] = localOut.surfSize; + + if ((minSize == 0) || + BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi)) + { + minSize = padSize[i]; + minSizeBlk = i; + } + } + else + { + ADDR_ASSERT_ALWAYS(); + break; } } } - if ((allowedBlockSet.micro == TRUE) && - (width <= blkDim[AddrBlockMicro].w) && - (height <= blkDim[AddrBlockMicro].h) && - (NextPow2(pIn->minSizeAlign) <= Size256)) + if (pIn->memoryBudget > 1.0) { - minSizeBlk = AddrBlockMicro; + // If minimum size is given by swizzle mode with bigger-block type, then don't ever check + // smaller-block type again in coming loop + switch (minSizeBlk) + { + case AddrBlockThick64KB: + allowedBlockSet.macroThin64KB = 0; + case AddrBlockThin64KB: + allowedBlockSet.macroThick4KB = 0; + case AddrBlockThick4KB: + allowedBlockSet.macroThin4KB = 0; + case AddrBlockThin4KB: + allowedBlockSet.micro = 0; + case AddrBlockMicro: + allowedBlockSet.linear = 0; + case AddrBlockLinear: + break; + + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) + { + if ((i != minSizeBlk) && + IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + { + if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE) + { + // Clear the block type if the memory waste is unacceptable + allowedBlockSet.value &= ~(1u << (i - 1)); + } + } + } + + // Remove linear block type if 2 or more block types are allowed + if (IsPow2(allowedBlockSet.value) == FALSE) + { + allowedBlockSet.linear = 0; + } + + // Select the biggest allowed block type + minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1; + + minSizeBlk = (minSizeBlk == AddrBlockMaxTiledType) ? AddrBlockLinear : minSizeBlk; } - if (minSizeBlk == AddrBlockMicro) + switch (minSizeBlk) { - ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx9Blk256BSwModeMask; - } - else if (minSizeBlk == AddrBlockThick4KB) - { - ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask; - } - else if (minSizeBlk == AddrBlockThin4KB) - { - allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? - Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask; - } - else if (minSizeBlk == AddrBlockThick64KB) - { - ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); - allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask; - } - else - { - ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB); - allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? - Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask; + case AddrBlockLinear: + allowedSwModeSet.value &= Gfx9LinearSwModeMask; + break; + + case AddrBlockMicro: + ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx9Blk256BSwModeMask; + break; + + case AddrBlockThin4KB: + allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? + Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask; + break; + + case AddrBlockThick4KB: + ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask; + break; + + case AddrBlockThin64KB: + allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? + Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask; + break; + + case AddrBlockThick64KB: + ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); + allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask; + break; + + default: + ADDR_ASSERT_ALWAYS(); + allowedSwModeSet.value = 0; + break; } } @@ -3742,7 +3806,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); // Determine swizzle type if there are 2 or more swizzle type candidates - if (IsPow2(allowedSwSet.value) == FALSE) + if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE)) { if (ElemLib::IsBlockCompressed(pIn->format)) { @@ -3808,10 +3872,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( allowedSwModeSet.value &= Gfx9ZSwModeMask; } } - } - // Swizzle type should be determined. - ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); + // Swizzle type should be determined. + ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); + } // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 1e77c4154c7..3bbf967772e 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -2357,7 +2357,7 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_ /* Query whether the surface is displayable. */ /* This is only useful for surfaces that are allocated without SCANOUT. */ - bool displayable = false; + BOOL_32 displayable = false; if (!config->is_3d && !config->is_cube) { r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.surf.swizzle_mode, surf->bpe * 8, &displayable);