swr: [rasterizer core] fix SIMD16 output merger
Honor the colorHottileEnable mask when accessing colorBuffer pointers. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
1a77e0c48d
commit
e62b6d2f0f
|
@ -541,7 +541,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
||||||
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
||||||
AR_END(BEEarlyDepthTest, 0);
|
AR_END(BEEarlyDepthTest, 0);
|
||||||
|
|
||||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||||
|
@ -574,7 +574,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
||||||
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
||||||
AR_END(BELateDepthTest, 0);
|
AR_END(BELateDepthTest, 0);
|
||||||
|
|
||||||
if (!_simd_movemask_ps(depthPassMask))
|
if (!_simd_movemask_ps(depthPassMask))
|
||||||
|
@ -593,7 +593,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||||
// output merger
|
// output merger
|
||||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||||
#if USE_8x2_TILE_BACKEND
|
#if USE_8x2_TILE_BACKEND
|
||||||
OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, useAlternateOffset);
|
OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||||
#else
|
#else
|
||||||
OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
||||||
#endif
|
#endif
|
||||||
|
@ -752,7 +752,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||||
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
||||||
AR_END(BEEarlyDepthTest, 0);
|
AR_END(BEEarlyDepthTest, 0);
|
||||||
|
|
||||||
// early-exit if no samples passed depth or earlyZ is forced on.
|
// early-exit if no samples passed depth or earlyZ is forced on.
|
||||||
|
@ -786,7 +786,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||||
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
||||||
AR_END(BELateDepthTest, 0);
|
AR_END(BELateDepthTest, 0);
|
||||||
|
|
||||||
if (!_simd_movemask_ps(depthPassMask))
|
if (!_simd_movemask_ps(depthPassMask))
|
||||||
|
@ -807,7 +807,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||||
// output merger
|
// output merger
|
||||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||||
#if USE_8x2_TILE_BACKEND
|
#if USE_8x2_TILE_BACKEND
|
||||||
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, useAlternateOffset);
|
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||||
#else
|
#else
|
||||||
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
||||||
#endif
|
#endif
|
||||||
|
@ -935,7 +935,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||||
{
|
{
|
||||||
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
|
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
|
||||||
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
||||||
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we have no covered samples that passed depth at this point, go to next tile
|
// if we have no covered samples that passed depth at this point, go to next tile
|
||||||
|
@ -969,7 +969,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||||
{
|
{
|
||||||
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
|
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
|
||||||
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
|
||||||
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
|
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
|
||||||
|
@ -1001,7 +1001,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||||
|
|
||||||
// broadcast the results of the PS to all passing pixels
|
// broadcast the results of the PS to all passing pixels
|
||||||
#if USE_8x2_TILE_BACKEND
|
#if USE_8x2_TILE_BACKEND
|
||||||
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, useAlternateOffset);
|
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||||
#else
|
#else
|
||||||
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
|
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
|
||||||
#endif
|
#endif
|
||||||
|
@ -1148,7 +1148,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
||||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||||
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||||
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
|
||||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||||
AR_END(BEEarlyDepthTest, 0);
|
AR_END(BEEarlyDepthTest, 0);
|
||||||
|
|
|
@ -691,6 +691,7 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTE
|
||||||
psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
|
psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Merge Output to 4x2 SIMD Tile Format
|
||||||
INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
|
INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
|
||||||
const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT)
|
const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT)
|
||||||
{
|
{
|
||||||
|
@ -751,8 +752,9 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N
|
||||||
}
|
}
|
||||||
|
|
||||||
#if USE_8x2_TILE_BACKEND
|
#if USE_8x2_TILE_BACKEND
|
||||||
|
// Merge Output to 8x2 SIMD16 Tile Format
|
||||||
INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
|
INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
|
||||||
const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, bool useAlternateOffset)
|
const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, const uint32_t colorBufferEnableMask, bool useAlternateOffset)
|
||||||
{
|
{
|
||||||
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
|
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
|
||||||
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
|
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
|
||||||
|
@ -765,7 +767,8 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N
|
||||||
simdvector blendSrc;
|
simdvector blendSrc;
|
||||||
simdvector blendOut;
|
simdvector blendOut;
|
||||||
|
|
||||||
for (uint32_t rt = 0; rt < NumRT; ++rt)
|
uint32_t colorBufferBit = 1;
|
||||||
|
for (uint32_t rt = 0; rt < NumRT; rt += 1, colorBufferBit <<= 1)
|
||||||
{
|
{
|
||||||
simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
|
simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
|
||||||
|
|
||||||
|
@ -774,10 +777,13 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N
|
||||||
/// TODO: move this into the blend JIT.
|
/// TODO: move this into the blend JIT.
|
||||||
blendOut = psContext.shaded[rt];
|
blendOut = psContext.shaded[rt];
|
||||||
|
|
||||||
blendSrc[0] = pColorSample[0];
|
if (colorBufferBit & colorBufferEnableMask)
|
||||||
blendSrc[1] = pColorSample[2];
|
{
|
||||||
blendSrc[2] = pColorSample[4];
|
blendSrc[0] = pColorSample[0];
|
||||||
blendSrc[3] = pColorSample[6];
|
blendSrc[1] = pColorSample[2];
|
||||||
|
blendSrc[2] = pColorSample[4];
|
||||||
|
blendSrc[3] = pColorSample[6];
|
||||||
|
}
|
||||||
|
|
||||||
// Blend outputs and update coverage mask for alpha test
|
// Blend outputs and update coverage mask for alpha test
|
||||||
if (pfnBlendFunc[rt] != nullptr)
|
if (pfnBlendFunc[rt] != nullptr)
|
||||||
|
|
Loading…
Reference in New Issue