amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call

This decreases the DCC retile map overhead from 23% to 18%.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5398>
This commit is contained in:
Marek Olšák 2020-06-09 02:40:20 -04:00 committed by Marge Bot
parent a1b9eb62f6
commit a99f4d5382
4 changed files with 93 additions and 117 deletions

View File

@ -3360,6 +3360,15 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
UINT_32 numFrags; ///< Color surface fragment number
UINT_32 pipeXor; ///< pipe Xor setting
UINT_32 pitch; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
UINT_32 height; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
UINT_32 compressBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
UINT_32 compressBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
UINT_32 compressBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
UINT_32 metaBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
UINT_32 metaBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
UINT_32 metaBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
UINT_32 dccRamSliceSize; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
/**

View File

@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
}
else
{
ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
input.size = sizeof(input);
input.dccKeyFlags = pIn->dccKeyFlags;
input.colorFlags = pIn->colorFlags;
input.swizzleMode = pIn->swizzleMode;
input.resourceType = pIn->resourceType;
input.bpp = pIn->bpp;
input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
input.numSlices = Max(pIn->numSlices, 1u);
input.numFrags = Max(pIn->numFrags, 1u);
input.numMipLevels = Max(pIn->numMipLevels, 1u);
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
const UINT_32 numPipeLog2 = m_pipesLog2;
const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
UINT_32 index = m_dccBaseIndex + elemLog2;
const UINT_8* patIdxTable;
ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
output.size = sizeof(output);
returnCode = ComputeDccInfo(&input, &output);
if (returnCode == ADDR_OK)
if (m_settings.supportRbPlus)
{
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
const UINT_32 numPipeLog2 = m_pipesLog2;
const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
UINT_32 index = m_dccBaseIndex + elemLog2;
const UINT_8* patIdxTable;
patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
if (m_settings.supportRbPlus)
if (pIn->dccKeyFlags.pipeAligned)
{
patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
index += MaxNumOfBpp;
if (pIn->dccKeyFlags.pipeAligned)
if (m_numPkrLog2 < 2)
{
index += MaxNumOfBpp;
if (m_numPkrLog2 < 2)
{
index += m_pipesLog2 * MaxNumOfBpp;
}
else
{
// 4 groups for "m_numPkrLog2 < 2" case
index += 4 * MaxNumOfBpp;
const UINT_32 dccPipePerPkr = 3;
index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
}
}
}
else
{
patIdxTable = DCC_64K_R_X_PATIDX;
if (pIn->dccKeyFlags.pipeAligned)
{
index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
index += m_pipesLog2 * MaxNumOfBpp;
}
else
{
index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
// 4 groups for "m_numPkrLog2 < 2" case
index += 4 * MaxNumOfBpp;
const UINT_32 dccPipePerPkr = 3;
index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
}
}
const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
blkSizeLog2 + 1, // +1 for nibble offset
pIn->x,
pIn->y,
pIn->slice,
0);
const UINT_32 xb = pIn->x / output.metaBlkWidth;
const UINT_32 yb = pIn->y / output.metaBlkHeight;
const UINT_32 pb = output.pitch / output.metaBlkWidth;
const UINT_32 blkIndex = (yb * pb) + xb;
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
(blkIndex * (1 << blkSizeLog2)) +
((blkOffset >> 1) ^ pipeXor);
}
else
{
patIdxTable = DCC_64K_R_X_PATIDX;
if (pIn->dccKeyFlags.pipeAligned)
{
index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
}
else
{
index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
}
}
const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
blkSizeLog2 + 1, // +1 for nibble offset
pIn->x,
pIn->y,
pIn->slice,
0);
const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
const UINT_32 blkIndex = (yb * pb) + xb;
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
(blkIndex * (1 << blkSizeLog2)) +
((blkOffset >> 1) ^ pipeXor);
}
return returnCode;

View File

@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
}
else
{
ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
input.size = sizeof(input);
input.dccKeyFlags = pIn->dccKeyFlags;
input.colorFlags = pIn->colorFlags;
input.swizzleMode = pIn->swizzleMode;
input.resourceType = pIn->resourceType;
input.bpp = pIn->bpp;
input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
input.numSlices = Max(pIn->numSlices, 1u);
input.numFrags = Max(pIn->numFrags, 1u);
input.numMipLevels = Max(pIn->numMipLevels, 1u);
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
output.size = sizeof(output);
MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
returnCode = ComputeDccInfo(&input, &output);
const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
if (returnCode == ADDR_OK)
{
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
UINT_32 xb = pIn->x / pIn->metaBlkWidth;
UINT_32 yb = pIn->y / pIn->metaBlkHeight;
UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
UINT_64 address = pMetaEq->solve(coords);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 zb = pIn->slice / output.metaBlkDepth;
pOut->addr = address >> 1;
UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
pIn->swizzleMode);
UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
UINT_64 address = pMetaEq->solve(coords);
UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
pOut->addr = address >> 1;
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
pIn->swizzleMode);
UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
}
pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
}
return returnCode;

View File

@ -1378,6 +1378,15 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
addrin.numSlices = 1;
addrin.numMipLevels = 1;
addrin.numFrags = 1;
addrin.pitch = dout.pitch;
addrin.height = dout.height;
addrin.compressBlkWidth = dout.compressBlkWidth;
addrin.compressBlkHeight = dout.compressBlkHeight;
addrin.compressBlkDepth = dout.compressBlkDepth;
addrin.metaBlkWidth = dout.metaBlkWidth;
addrin.metaBlkHeight = dout.metaBlkHeight;
addrin.metaBlkDepth = dout.metaBlkDepth;
addrin.dccRamSliceSize = dout.dccRamSliceSize;
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
addrout.size = sizeof(addrout);