swr: [rasterizer core] fix SIMD16 output merger

Honor the colorHottileEnable mask when accessing colorBuffer pointers.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2016-12-21 18:01:56 -06:00
parent 1a77e0c48d
commit e62b6d2f0f
2 changed files with 22 additions and 16 deletions

View File

@ -593,7 +593,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// output merger // output merger
AR_BEGIN(BEOutputMerger, pDC->drawId); AR_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND #if USE_8x2_TILE_BACKEND
OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, useAlternateOffset); OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
#else #else
OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets); OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
#endif #endif
@ -807,7 +807,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// output merger // output merger
AR_BEGIN(BEOutputMerger, pDC->drawId); AR_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND #if USE_8x2_TILE_BACKEND
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, useAlternateOffset); OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
#else #else
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets); OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
#endif #endif
@ -1001,7 +1001,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// broadcast the results of the PS to all passing pixels // broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND #if USE_8x2_TILE_BACKEND
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, useAlternateOffset); OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
#else #else
OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets); OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
#endif #endif

View File

@ -691,6 +691,7 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTE
psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample); psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
} }
// Merge Output to 4x2 SIMD Tile Format
INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT) const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT)
{ {
@ -751,8 +752,9 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N
} }
#if USE_8x2_TILE_BACKEND #if USE_8x2_TILE_BACKEND
// Merge Output to 8x2 SIMD16 Tile Format
INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, bool useAlternateOffset) const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, const uint32_t colorBufferEnableMask, bool useAlternateOffset)
{ {
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@ -765,7 +767,8 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N
simdvector blendSrc; simdvector blendSrc;
simdvector blendOut; simdvector blendOut;
for (uint32_t rt = 0; rt < NumRT; ++rt) uint32_t colorBufferBit = 1;
for (uint32_t rt = 0; rt < NumRT; rt += 1, colorBufferBit <<= 1)
{ {
simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset); simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
@ -774,10 +777,13 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N
/// TODO: move this into the blend JIT. /// TODO: move this into the blend JIT.
blendOut = psContext.shaded[rt]; blendOut = psContext.shaded[rt];
if (colorBufferBit & colorBufferEnableMask)
{
blendSrc[0] = pColorSample[0]; blendSrc[0] = pColorSample[0];
blendSrc[1] = pColorSample[2]; blendSrc[1] = pColorSample[2];
blendSrc[2] = pColorSample[4]; blendSrc[2] = pColorSample[4];
blendSrc[3] = pColorSample[6]; blendSrc[3] = pColorSample[6];
}
// Blend outputs and update coverage mask for alpha test // Blend outputs and update coverage mask for alpha test
if (pfnBlendFunc[rt] != nullptr) if (pfnBlendFunc[rt] != nullptr)