amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call
This decreases the DCC retile map overhead from 23% to 18%. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5398>
This commit is contained in:
parent
a1b9eb62f6
commit
a99f4d5382
|
@ -3360,6 +3360,15 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
|
|||
UINT_32 numFrags; ///< Color surface fragment number
|
||||
|
||||
UINT_32 pipeXor; ///< pipe Xor setting
|
||||
UINT_32 pitch; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
|
||||
UINT_32 height; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
|
||||
UINT_32 compressBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
|
||||
UINT_32 compressBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
|
||||
UINT_32 compressBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
|
||||
UINT_32 metaBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
|
||||
UINT_32 metaBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
|
||||
UINT_32 metaBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
|
||||
UINT_32 dccRamSliceSize; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
|
||||
} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
|
||||
|
||||
/**
|
||||
|
|
|
@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
|
|||
}
|
||||
else
|
||||
{
|
||||
ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
|
||||
input.size = sizeof(input);
|
||||
input.dccKeyFlags = pIn->dccKeyFlags;
|
||||
input.colorFlags = pIn->colorFlags;
|
||||
input.swizzleMode = pIn->swizzleMode;
|
||||
input.resourceType = pIn->resourceType;
|
||||
input.bpp = pIn->bpp;
|
||||
input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
|
||||
input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
|
||||
input.numSlices = Max(pIn->numSlices, 1u);
|
||||
input.numFrags = Max(pIn->numFrags, 1u);
|
||||
input.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
|
||||
const UINT_32 numPipeLog2 = m_pipesLog2;
|
||||
const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
|
||||
UINT_32 index = m_dccBaseIndex + elemLog2;
|
||||
const UINT_8* patIdxTable;
|
||||
|
||||
ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
|
||||
output.size = sizeof(output);
|
||||
|
||||
returnCode = ComputeDccInfo(&input, &output);
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
if (m_settings.supportRbPlus)
|
||||
{
|
||||
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
|
||||
const UINT_32 numPipeLog2 = m_pipesLog2;
|
||||
const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
|
||||
UINT_32 index = m_dccBaseIndex + elemLog2;
|
||||
const UINT_8* patIdxTable;
|
||||
patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
|
||||
|
||||
if (m_settings.supportRbPlus)
|
||||
if (pIn->dccKeyFlags.pipeAligned)
|
||||
{
|
||||
patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
|
||||
index += MaxNumOfBpp;
|
||||
|
||||
if (pIn->dccKeyFlags.pipeAligned)
|
||||
if (m_numPkrLog2 < 2)
|
||||
{
|
||||
index += MaxNumOfBpp;
|
||||
|
||||
if (m_numPkrLog2 < 2)
|
||||
{
|
||||
index += m_pipesLog2 * MaxNumOfBpp;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 4 groups for "m_numPkrLog2 < 2" case
|
||||
index += 4 * MaxNumOfBpp;
|
||||
|
||||
const UINT_32 dccPipePerPkr = 3;
|
||||
|
||||
index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
|
||||
(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
patIdxTable = DCC_64K_R_X_PATIDX;
|
||||
|
||||
if (pIn->dccKeyFlags.pipeAligned)
|
||||
{
|
||||
index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
|
||||
index += m_pipesLog2 * MaxNumOfBpp;
|
||||
}
|
||||
else
|
||||
{
|
||||
index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
|
||||
// 4 groups for "m_numPkrLog2 < 2" case
|
||||
index += 4 * MaxNumOfBpp;
|
||||
|
||||
const UINT_32 dccPipePerPkr = 3;
|
||||
|
||||
index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
|
||||
(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
|
||||
}
|
||||
}
|
||||
|
||||
const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
|
||||
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
|
||||
const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
|
||||
blkSizeLog2 + 1, // +1 for nibble offset
|
||||
pIn->x,
|
||||
pIn->y,
|
||||
pIn->slice,
|
||||
0);
|
||||
const UINT_32 xb = pIn->x / output.metaBlkWidth;
|
||||
const UINT_32 yb = pIn->y / output.metaBlkHeight;
|
||||
const UINT_32 pb = output.pitch / output.metaBlkWidth;
|
||||
const UINT_32 blkIndex = (yb * pb) + xb;
|
||||
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
|
||||
|
||||
pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
|
||||
(blkIndex * (1 << blkSizeLog2)) +
|
||||
((blkOffset >> 1) ^ pipeXor);
|
||||
}
|
||||
else
|
||||
{
|
||||
patIdxTable = DCC_64K_R_X_PATIDX;
|
||||
|
||||
if (pIn->dccKeyFlags.pipeAligned)
|
||||
{
|
||||
index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
|
||||
}
|
||||
else
|
||||
{
|
||||
index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
|
||||
}
|
||||
}
|
||||
|
||||
const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
|
||||
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
|
||||
const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
|
||||
blkSizeLog2 + 1, // +1 for nibble offset
|
||||
pIn->x,
|
||||
pIn->y,
|
||||
pIn->slice,
|
||||
0);
|
||||
const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
|
||||
const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
|
||||
const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
|
||||
const UINT_32 blkIndex = (yb * pb) + xb;
|
||||
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
|
||||
|
||||
pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
|
||||
(blkIndex * (1 << blkSizeLog2)) +
|
||||
((blkOffset >> 1) ^ pipeXor);
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
|
|
|
@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
|
|||
}
|
||||
else
|
||||
{
|
||||
ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
|
||||
input.size = sizeof(input);
|
||||
input.dccKeyFlags = pIn->dccKeyFlags;
|
||||
input.colorFlags = pIn->colorFlags;
|
||||
input.swizzleMode = pIn->swizzleMode;
|
||||
input.resourceType = pIn->resourceType;
|
||||
input.bpp = pIn->bpp;
|
||||
input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
|
||||
input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
|
||||
input.numSlices = Max(pIn->numSlices, 1u);
|
||||
input.numFrags = Max(pIn->numFrags, 1u);
|
||||
input.numMipLevels = Max(pIn->numMipLevels, 1u);
|
||||
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
|
||||
UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
|
||||
UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
|
||||
UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
|
||||
UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
|
||||
UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
|
||||
UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
|
||||
UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
|
||||
|
||||
ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
|
||||
output.size = sizeof(output);
|
||||
MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
|
||||
Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
|
||||
metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
|
||||
compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
|
||||
|
||||
returnCode = ComputeDccInfo(&input, &output);
|
||||
const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
|
||||
|
||||
if (returnCode == ADDR_OK)
|
||||
{
|
||||
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
|
||||
UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
|
||||
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
|
||||
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
|
||||
UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
|
||||
UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
|
||||
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
|
||||
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
|
||||
UINT_32 xb = pIn->x / pIn->metaBlkWidth;
|
||||
UINT_32 yb = pIn->y / pIn->metaBlkHeight;
|
||||
UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
|
||||
|
||||
MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
|
||||
Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
|
||||
metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
|
||||
compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
|
||||
UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
|
||||
UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
|
||||
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
|
||||
|
||||
const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
|
||||
UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
|
||||
UINT_64 address = pMetaEq->solve(coords);
|
||||
|
||||
UINT_32 xb = pIn->x / output.metaBlkWidth;
|
||||
UINT_32 yb = pIn->y / output.metaBlkHeight;
|
||||
UINT_32 zb = pIn->slice / output.metaBlkDepth;
|
||||
pOut->addr = address >> 1;
|
||||
|
||||
UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
|
||||
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
|
||||
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
|
||||
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
|
||||
pIn->swizzleMode);
|
||||
|
||||
UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
|
||||
UINT_64 address = pMetaEq->solve(coords);
|
||||
UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
|
||||
|
||||
pOut->addr = address >> 1;
|
||||
|
||||
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
|
||||
pIn->swizzleMode);
|
||||
|
||||
UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
|
||||
|
||||
pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
|
||||
}
|
||||
pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
|
|
|
@ -1378,6 +1378,15 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
|||
addrin.numSlices = 1;
|
||||
addrin.numMipLevels = 1;
|
||||
addrin.numFrags = 1;
|
||||
addrin.pitch = dout.pitch;
|
||||
addrin.height = dout.height;
|
||||
addrin.compressBlkWidth = dout.compressBlkWidth;
|
||||
addrin.compressBlkHeight = dout.compressBlkHeight;
|
||||
addrin.compressBlkDepth = dout.compressBlkDepth;
|
||||
addrin.metaBlkWidth = dout.metaBlkWidth;
|
||||
addrin.metaBlkHeight = dout.metaBlkHeight;
|
||||
addrin.metaBlkDepth = dout.metaBlkDepth;
|
||||
addrin.dccRamSliceSize = dout.dccRamSliceSize;
|
||||
|
||||
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
|
||||
addrout.size = sizeof(addrout);
|
||||
|
|
Loading…
Reference in New Issue