amdgpu/addrlib: Adjust bank equation bit order based on macro tile aspect ratio settings

By this way, we can have valid equation for 2D_THIN1 tile mode.
Add flag "preferEquation" to return equation index without adjusting
input tile mode.
This commit is contained in:
Frans Gu 2016-03-04 05:04:23 -05:00 committed by Marek Olšák
parent ed1aca8e8f
commit 6764d96eaa
4 changed files with 282 additions and 91 deletions

View File

@ -146,10 +146,12 @@ typedef union _ADDR_EQUATION_KEY
UINT_32 tileMode : 5; ///< Tile mode
UINT_32 microTileType : 3; ///< Micro tile type
UINT_32 pipeConfig : 5; ///< pipe config
UINT_32 numBanks : 5; ///< Number of banks
UINT_32 numBanksLog2 : 3; ///< Number of banks log2
UINT_32 bankWidth : 4; ///< Bank width
UINT_32 bankHeight : 4; ///< Bank height
UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio
UINT_32 prt : 1; ///< SI only, indicate whether this equation is for prt
UINT_32 reserved : 1; ///< Reserved bit
} fields;
UINT_32 value;
} ADDR_EQUATION_KEY;
@ -516,7 +518,8 @@ typedef union _ADDR_SURFACE_FLAGS
UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output.
UINT_32 rotateDisplay : 1; ///< Rotate micro tile type
UINT_32 minimizeAlignment : 1; ///< Minimize alignment
UINT_32 reserved : 5; ///< Reserved bits
UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode
UINT_32 reserved : 4; ///< Reserved bits
};
UINT_32 value;

View File

@ -889,48 +889,54 @@ VOID CiLib::HwlOptimizeTileMode(
// Override 2D/3D macro tile mode to PRT_* tile mode if
// client driver requests this surface is equation compatible
if ((pInOut->flags.needEquation == TRUE) &&
(pInOut->numSamples <= 1) &&
(IsMacroTiled(tileMode) == TRUE) &&
(IsPrtTileMode(tileMode) == FALSE))
if (IsMacroTiled(tileMode) == TRUE)
{
UINT_32 thickness = Thickness(tileMode);
if ((pInOut->maxBaseAlign != 0) && (pInOut->maxBaseAlign < Block64K))
if ((pInOut->flags.needEquation == TRUE) &&
(pInOut->numSamples <= 1) &&
(IsPrtTileMode(tileMode) == FALSE))
{
tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
}
else if (thickness == 1)
{
tileMode = ADDR_TM_PRT_TILED_THIN1;
}
else
{
static const UINT_32 PrtTileBytes = 0x10000;
// First prt thick tile index in the tile mode table
static const UINT_32 PrtThickTileIndex = 22;
ADDR_TILEINFO tileInfo = {0};
HwlComputeMacroModeIndex(PrtThickTileIndex,
pInOut->flags,
pInOut->bpp,
pInOut->numSamples,
&tileInfo);
UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
thickness * HwlGetPipes(&tileInfo) *
tileInfo.banks * tileInfo.bankWidth *
tileInfo.bankHeight;
if (macroTileBytes <= PrtTileBytes)
if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K)))
{
tileMode = ADDR_TM_PRT_TILED_THICK;
}
else
{
tileMode = ADDR_TM_PRT_TILED_THIN1;
UINT_32 thickness = Thickness(tileMode);
if (thickness == 1)
{
tileMode = ADDR_TM_PRT_TILED_THIN1;
}
else
{
static const UINT_32 PrtTileBytes = 0x10000;
// First prt thick tile index in the tile mode table
static const UINT_32 PrtThickTileIndex = 22;
ADDR_TILEINFO tileInfo = {0};
HwlComputeMacroModeIndex(PrtThickTileIndex,
pInOut->flags,
pInOut->bpp,
pInOut->numSamples,
&tileInfo);
UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
thickness * HwlGetPipes(&tileInfo) *
tileInfo.banks * tileInfo.bankWidth *
tileInfo.bankHeight;
if (macroTileBytes <= PrtTileBytes)
{
tileMode = ADDR_TM_PRT_TILED_THICK;
}
else
{
tileMode = ADDR_TM_PRT_TILED_THIN1;
}
}
}
}
if (pInOut->maxBaseAlign != 0)
{
pInOut->flags.dccCompatible = FALSE;
}
}
if (tileMode != pInOut->tileMode)

View File

@ -67,6 +67,43 @@ Lib* SiHwlInit(const Client* pClient)
namespace V1
{
// We don't support MSAA for equation
const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] =
{
{TRUE, TRUE, TRUE, FALSE, FALSE}, // 0, non-AA compressed depth or any stencil
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 1, 2xAA/4xAA compressed depth with or without stencil
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 2, 8xAA compressed depth with or without stencil
{FALSE, TRUE, FALSE, FALSE, FALSE}, // 3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth
{TRUE, TRUE, TRUE, FALSE, FALSE}, // 4, 1D depth
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 5, 16 bpp depth PRT (4xMSAA)
{FALSE, FALSE, TRUE, FALSE, FALSE}, // 6, 32 bpp depth PRT (non-MSAA)
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 7, 32 bpp depth PRT (4xMSAA)
{TRUE, TRUE, TRUE, TRUE, TRUE }, // 8, Linear
{TRUE, TRUE, TRUE, TRUE, TRUE }, // 9, 1D display
{TRUE, FALSE, FALSE, FALSE, FALSE}, // 10, 8 bpp color (displayable)
{FALSE, TRUE, FALSE, FALSE, FALSE}, // 11, 16 bpp color (displayable)
{FALSE, FALSE, TRUE, TRUE, FALSE}, // 12, 32/64 bpp color (displayable)
{TRUE, TRUE, TRUE, TRUE, TRUE }, // 13, 1D thin
{TRUE, FALSE, FALSE, FALSE, FALSE}, // 14, 8 bpp color non-displayable
{FALSE, TRUE, FALSE, FALSE, FALSE}, // 15, 16 bpp color non-displayable
{FALSE, FALSE, TRUE, FALSE, FALSE}, // 16, 32 bpp color non-displayable
{FALSE, FALSE, FALSE, TRUE, TRUE }, // 17, 64/128 bpp color non-displayable
{TRUE, TRUE, TRUE, TRUE, TRUE }, // 18, 1D THICK
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 19, 2D XTHICK
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 20, 2D THICK
{TRUE, FALSE, FALSE, FALSE, FALSE}, // 21, 8 bpp 2D PRTs (non-MSAA)
{FALSE, TRUE, FALSE, FALSE, FALSE}, // 22, 16 bpp 2D PRTs (non-MSAA)
{FALSE, FALSE, TRUE, FALSE, FALSE}, // 23, 32 bpp 2D PRTs (non-MSAA)
{FALSE, FALSE, FALSE, TRUE, FALSE}, // 24, 64 bpp 2D PRTs (non-MSAA)
{FALSE, FALSE, FALSE, FALSE, TRUE }, // 25, 128bpp 2D PRTs (non-MSAA)
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 26, none
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 27, none
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 28, none
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 29, none
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 30, 64bpp 2D PRTs (4xMSAA)
{FALSE, FALSE, FALSE, FALSE, FALSE}, // 31, none
};
/**
****************************************************************************************************
* SiLib::SiLib
@ -219,37 +256,132 @@ ADDR_E_RETURNCODE SiLib::ComputeBankEquation(
switch (pTileInfo->banks)
{
case 16:
pEquation->addr[0] = y6;
pEquation->xor1[0] = x3;
pEquation->addr[1] = y5;
pEquation->xor1[1] = y6;
pEquation->xor2[1] = x4;
pEquation->addr[2] = y4;
pEquation->xor1[2] = x5;
pEquation->addr[3] = y3;
pEquation->xor1[3] = x6;
if (pTileInfo->macroAspectRatio == 1)
{
pEquation->addr[0] = y6;
pEquation->xor1[0] = x3;
pEquation->addr[1] = y5;
pEquation->xor1[1] = y6;
pEquation->xor2[1] = x4;
pEquation->addr[2] = y4;
pEquation->xor1[2] = x5;
pEquation->addr[3] = y3;
pEquation->xor1[3] = x6;
}
else if (pTileInfo->macroAspectRatio == 2)
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y6;
pEquation->addr[1] = y5;
pEquation->xor1[1] = y6;
pEquation->xor2[1] = x4;
pEquation->addr[2] = y4;
pEquation->xor1[2] = x5;
pEquation->addr[3] = y3;
pEquation->xor1[3] = x6;
}
else if (pTileInfo->macroAspectRatio == 4)
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y6;
pEquation->addr[1] = x4;
pEquation->xor1[1] = y5;
pEquation->xor2[1] = y6;
pEquation->addr[2] = y4;
pEquation->xor1[2] = x5;
pEquation->addr[3] = y3;
pEquation->xor1[3] = x6;
}
else if (pTileInfo->macroAspectRatio == 8)
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y6;
pEquation->addr[1] = x4;
pEquation->xor1[1] = y5;
pEquation->xor2[1] = y6;
pEquation->addr[2] = x5;
pEquation->xor1[2] = y4;
pEquation->addr[3] = y3;
pEquation->xor1[3] = x6;
}
else
{
ADDR_ASSERT_ALWAYS();
}
pEquation->numBits = 4;
break;
case 8:
pEquation->addr[0] = y5;
pEquation->xor1[0] = x3;
pEquation->addr[1] = y4;
pEquation->xor1[1] = y5;
pEquation->xor2[1] = x4;
pEquation->addr[2] = y3;
pEquation->xor1[2] = x5;
if (pTileInfo->macroAspectRatio == 1)
{
pEquation->addr[0] = y5;
pEquation->xor1[0] = x3;
pEquation->addr[1] = y4;
pEquation->xor1[1] = y5;
pEquation->xor2[1] = x4;
pEquation->addr[2] = y3;
pEquation->xor1[2] = x5;
}
else if (pTileInfo->macroAspectRatio == 2)
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y5;
pEquation->addr[1] = y4;
pEquation->xor1[1] = y5;
pEquation->xor2[1] = x4;
pEquation->addr[2] = y3;
pEquation->xor1[2] = x5;
}
else if (pTileInfo->macroAspectRatio == 4)
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y5;
pEquation->addr[1] = x4;
pEquation->xor1[1] = y4;
pEquation->xor2[1] = y5;
pEquation->addr[2] = y3;
pEquation->xor1[2] = x5;
}
else
{
ADDR_ASSERT_ALWAYS();
}
pEquation->numBits = 3;
break;
case 4:
pEquation->addr[0] = y4;
pEquation->xor1[0] = x3;
pEquation->addr[1] = y3;
pEquation->xor1[1] = x4;
if (pTileInfo->macroAspectRatio == 1)
{
pEquation->addr[0] = y4;
pEquation->xor1[0] = x3;
pEquation->addr[1] = y3;
pEquation->xor1[1] = x4;
}
else if (pTileInfo->macroAspectRatio == 2)
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y4;
pEquation->addr[1] = y3;
pEquation->xor1[1] = x4;
}
else
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y4;
pEquation->addr[1] = x4;
pEquation->xor1[1] = y3;
}
pEquation->numBits = 2;
break;
case 2:
pEquation->addr[0] = y3;
pEquation->xor1[0] = x3;
if (pTileInfo->macroAspectRatio == 1)
{
pEquation->addr[0] = y3;
pEquation->xor1[0] = x3;
}
else
{
pEquation->addr[0] = x3;
pEquation->xor1[0] = y3;
}
pEquation->numBits = 1;
break;
default:
@ -2522,11 +2654,24 @@ ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo(
UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex);
if ((pIn->flags.needEquation == TRUE) &&
if (((pIn->flags.needEquation == TRUE) ||
(pIn->flags.preferEquation == TRUE)) &&
(pIn->numSamples <= 1) &&
(tileIndex < TileTableSize))
{
pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
static const UINT_32 SiUncompressDepthTileIndex = 3;
if ((pIn->flags.prt == FALSE) &&
(m_uncompressDepthEqIndex != 0) &&
(tileIndex == SiUncompressDepthTileIndex))
{
pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3);
}
else
{
pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
}
if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX)
{
@ -3157,8 +3302,6 @@ VOID SiLib::HwlOptimizeTileMode(
{
UINT_32 thickness = Thickness(tileMode);
pInOut->flags.prt = TRUE;
if (thickness > 1)
{
tileMode = ADDR_TM_1D_TILED_THICK;
@ -3449,7 +3592,7 @@ VOID SiLib::InitEquationTable()
HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL);
// Check if the input is supported
if (IsEquationSupported(bpp, tileConfig, tileIndex) == TRUE)
if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE)
{
ADDR_EQUATION_KEY key = {{0}};
@ -3461,10 +3604,12 @@ VOID SiLib::InitEquationTable()
key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ?
ADDR_NON_DISPLAYABLE : tileConfig.type;
key.fields.pipeConfig = tileConfig.info.pipeConfig;
key.fields.numBanks = tileConfig.info.banks;
key.fields.numBanksLog2 = Log2(tileConfig.info.banks);
key.fields.bankWidth = tileConfig.info.bankWidth;
key.fields.bankHeight = tileConfig.info.bankHeight;
key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio;
key.fields.prt = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0;
// Find in the table if the equation has been built based on the key
for (UINT_32 i = 0; i < m_numEquations; i++)
@ -3528,7 +3673,7 @@ VOID SiLib::InitEquationTable()
MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
pTileInfo->macroAspectRatio;
if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
if (key.fields.prt)
{
UINT_32 macroTileSize =
m_blockWidth[equationIndex] * m_blockHeight[equationIndex] *
@ -3571,6 +3716,48 @@ VOID SiLib::InitEquationTable()
// fill the invalid equation index
m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex;
}
if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
{
// For tile index 3 which is shared between PRT depth and uncompressed depth
m_uncompressDepthEqIndex = m_numEquations;
for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++)
{
TileConfig tileConfig = m_tileTable[3];
ADDR_EQUATION equation;
ADDR_E_RETURNCODE retCode;
memset(&equation, 0, sizeof(ADDR_EQUATION));
retCode = ComputeMacroTileEquation(log2ElemBytes,
tileConfig.mode,
tileConfig.type,
&tileConfig.info,
&equation);
if (retCode == ADDR_OK)
{
UINT_32 equationIndex = m_numEquations;
ADDR_ASSERT(equationIndex < EquationTableSize);
m_blockSlices[equationIndex] = 1;
const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
m_blockWidth[equationIndex] =
HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
pTileInfo->macroAspectRatio;
m_blockHeight[equationIndex] =
MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
pTileInfo->macroAspectRatio;
m_equationTable[equationIndex] = equation;
m_numEquations++;
}
}
}
}
}
@ -3586,9 +3773,10 @@ VOID SiLib::InitEquationTable()
****************************************************************************************************
*/
BOOL_32 SiLib::IsEquationSupported(
UINT_32 bpp, ///< Bits per pixel
TileConfig tileConfig, ///< Tile config
INT_32 tileIndex ///< Tile index
UINT_32 bpp, ///< Bits per pixel
TileConfig tileConfig, ///< Tile config
INT_32 tileIndex, ///< Tile index
UINT_32 elementBytesLog2 ///< Log2 of element bytes
) const
{
BOOL_32 supported = TRUE;
@ -3624,24 +3812,7 @@ BOOL_32 SiLib::IsEquationSupported(
if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
{
// Please refer to SiLib::HwlSetupTileInfo for PRT tile index selecting
// Tile index 3, 6, 21-25 are for PRT single sample
if (tileIndex == 3)
{
supported = (bpp == 16);
}
else if (tileIndex == 6)
{
supported = (bpp == 32);
}
else if ((tileIndex >= 21) && (tileIndex <= 25))
{
supported = (bpp == 8u * (1u << (static_cast<UINT_32>(tileIndex) - 21u)));
}
else
{
supported = FALSE;
}
supported = m_EquationSupport[tileIndex][elementBytesLog2];
}
}

View File

@ -261,7 +261,7 @@ protected:
// Check if it is supported for given bpp and tile config to generate an equation
BOOL_32 IsEquationSupported(
UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex) const;
UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const;
// Protected non-virtual functions
VOID ComputeTileCoordFromPipeAndElemIdx(
@ -289,10 +289,19 @@ protected:
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxNumElementBytes = 5;
static const BOOL_32 m_EquationSupport[TileTableSize][MaxNumElementBytes];
// Prt tile mode index mask
static const UINT_32 SiPrtTileIndexMask = ((1 << 3) | (1 << 5) | (1 << 6) | (1 << 7) |
(1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) |
(1 << 25) | (1 << 30));
// More than half slots in tile mode table can't support equation
static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2;
static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
UINT_32 m_numMacroBits[EquationTableSize];
UINT_32 m_blockWidth[EquationTableSize];
UINT_32 m_blockHeight[EquationTableSize];
UINT_32 m_blockSlices[EquationTableSize];
@ -301,6 +310,8 @@ protected:
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize];
UINT_32 m_uncompressDepthEqIndex;
private:
VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;