swr: [rasterizer core] backend refactor
Lump all template args into a bundle of traits, and add some functionality to the MSAA traits. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
43f46caf76
commit
4e1e0b3a32
|
@ -763,7 +763,6 @@ extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_IN
|
|||
extern PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS + 1][SWR_MULTISAMPLE_TYPE_MAX];
|
||||
extern PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2];
|
||||
extern PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2];
|
||||
extern PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2];
|
||||
void SetupPipeline(DRAW_CONTEXT *pDC)
|
||||
{
|
||||
DRAW_STATE* pState = pDC->pState;
|
||||
|
@ -827,9 +826,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
|
||||
bBarycentrics = (barycentricsMask & SWR_BARYCENTRIC_PER_SAMPLE_MASK) > 0 ? 1 : 0;
|
||||
backendFuncs.pfnCalcSampleBarycentrics = gSampleBarycentricTable[bBarycentrics];
|
||||
|
||||
bBarycentrics = (barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0 ? 1 : 0;
|
||||
backendFuncs.pfnCalcCentroidBarycentrics = gCentroidBarycentricTable[rastState.sampleCount][bBarycentrics][rastState.samplePattern][forcedSampleCount];
|
||||
}
|
||||
|
||||
PFN_PROCESS_PRIMS pfnBinner;
|
||||
|
|
|
@ -459,10 +459,10 @@ simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscala
|
|||
return _simd_movemask_ps(vClipMask);
|
||||
}
|
||||
|
||||
template<bool perspMask>
|
||||
template<bool bGenerateBarycentrics>
|
||||
INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
|
||||
{
|
||||
if(perspMask)
|
||||
if(bGenerateBarycentrics)
|
||||
{
|
||||
// evaluate I,J
|
||||
psContext.vI.center = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
|
||||
|
@ -475,10 +475,10 @@ INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEX
|
|||
}
|
||||
}
|
||||
|
||||
template<bool perspMask>
|
||||
template<bool bGenerateBarycentrics>
|
||||
INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
|
||||
{
|
||||
if(perspMask)
|
||||
if(bGenerateBarycentrics)
|
||||
{
|
||||
// evaluate I,J
|
||||
psContext.vI.sample = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
|
||||
|
@ -502,13 +502,12 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTE
|
|||
// evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the pixel, then the first sample covered by the
|
||||
// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCount, bool bForcedSampleCount>
|
||||
template<typename T>
|
||||
INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
|
||||
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
|
||||
{
|
||||
uint32_t inputMask[KNOB_SIMD_WIDTH];
|
||||
|
||||
generateInputCoverage<sampleCount, 1, bForcedSampleCount>(coverageMask, inputMask, sampleMask);
|
||||
generateInputCoverage<T>(coverageMask, inputMask, sampleMask);
|
||||
|
||||
// Case (2) - partially covered pixel
|
||||
|
||||
|
@ -524,29 +523,29 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
|
|||
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
|
||||
|
||||
// look up and set the sample offsets from UL pixel corner for first covered sample
|
||||
__m256 vXSample = _mm256_set_ps(MultisampleTraits<sampleCount>::X(sampleNum[7]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[6]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[5]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[4]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[3]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[2]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[1]),
|
||||
MultisampleTraits<sampleCount>::X(sampleNum[0]));
|
||||
__m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
|
||||
T::MultisampleT::X(sampleNum[6]),
|
||||
T::MultisampleT::X(sampleNum[5]),
|
||||
T::MultisampleT::X(sampleNum[4]),
|
||||
T::MultisampleT::X(sampleNum[3]),
|
||||
T::MultisampleT::X(sampleNum[2]),
|
||||
T::MultisampleT::X(sampleNum[1]),
|
||||
T::MultisampleT::X(sampleNum[0]));
|
||||
|
||||
__m256 vYSample = _mm256_set_ps(MultisampleTraits<sampleCount>::Y(sampleNum[7]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[6]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[5]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[4]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[3]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[2]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[1]),
|
||||
MultisampleTraits<sampleCount>::Y(sampleNum[0]));
|
||||
__m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
|
||||
T::MultisampleT::Y(sampleNum[6]),
|
||||
T::MultisampleT::Y(sampleNum[5]),
|
||||
T::MultisampleT::Y(sampleNum[4]),
|
||||
T::MultisampleT::Y(sampleNum[3]),
|
||||
T::MultisampleT::Y(sampleNum[2]),
|
||||
T::MultisampleT::Y(sampleNum[1]),
|
||||
T::MultisampleT::Y(sampleNum[0]));
|
||||
// add sample offset to UL pixel corner
|
||||
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
|
||||
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
|
||||
|
||||
// Case (1) and case (3b) - All samples covered or not covered with full SampleMask
|
||||
static const __m256i vFullyCoveredMask = MultisampleTraits<sampleCount>::FullSampleMask();
|
||||
static const __m256i vFullyCoveredMask = T::MultisampleT::FullSampleMask();
|
||||
__m256i vInputCoveragei = _mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]);
|
||||
__m256i vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask);
|
||||
|
||||
|
@ -570,46 +569,38 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
|
|||
|
||||
__m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
|
||||
|
||||
vXSample = _simd_set1_ps(MultisampleTraits<sampleCount>::X(firstCoveredSampleMaskSample));
|
||||
vYSample = _simd_set1_ps(MultisampleTraits<sampleCount>::Y(firstCoveredSampleMaskSample));
|
||||
vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
|
||||
vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
|
||||
|
||||
// blend in case 3a pixel locations
|
||||
psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
|
||||
psContext.vY.centroid = _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
|
||||
}
|
||||
|
||||
template<uint32_t sampleCount, uint32_t persp, uint32_t standardPattern, uint32_t forcedMultisampleCount>
|
||||
template<typename T>
|
||||
INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext,
|
||||
const uint64_t *const coverageMask, const uint32_t sampleMask,
|
||||
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
|
||||
{
|
||||
static const bool bPersp = (bool)persp;
|
||||
static const bool bIsStandardPattern = (bool)standardPattern;
|
||||
static const bool bForcedMultisampleCount = (bool)forcedMultisampleCount;
|
||||
|
||||
// calculate centroid positions
|
||||
if(bPersp)
|
||||
if(T::bIsStandardPattern)
|
||||
{
|
||||
if(bIsStandardPattern)
|
||||
{
|
||||
///@ todo: don't need to generate input coverage 2x if input coverage and centroid
|
||||
CalcCentroidPos<(SWR_MULTISAMPLE_COUNT)sampleCount, bForcedMultisampleCount>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL);
|
||||
}
|
||||
else
|
||||
{
|
||||
static const __m256 pixelCenter = _simd_set1_ps(0.5f);
|
||||
psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter);
|
||||
psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter);
|
||||
}
|
||||
// evaluate I,J
|
||||
psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
|
||||
psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
|
||||
psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
|
||||
psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
|
||||
|
||||
// interpolate 1/w
|
||||
psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
|
||||
///@ todo: don't need to generate input coverage 2x if input coverage and centroid
|
||||
CalcCentroidPos<T>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL);
|
||||
}
|
||||
else
|
||||
{
|
||||
static const __m256 pixelCenter = _simd_set1_ps(0.5f);
|
||||
psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter);
|
||||
psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter);
|
||||
}
|
||||
// evaluate I,J
|
||||
psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
|
||||
psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
|
||||
psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
|
||||
psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
|
||||
|
||||
// interpolate 1/w
|
||||
psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
|
||||
}
|
||||
|
||||
template<uint32_t NumRT, uint32_t sampleCountT>
|
||||
|
@ -680,13 +671,10 @@ void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_REND
|
|||
}
|
||||
}
|
||||
|
||||
template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
|
||||
template<typename T>
|
||||
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
RDTSC_START(BESetup);
|
||||
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
|
||||
static const bool bInputCoverage = (bool)inputCoverage;
|
||||
static const bool bCentroidPos = (bool)centroidPos;
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
|
@ -736,8 +724,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
psContext.J = work.J;
|
||||
psContext.recipDet = work.recipDet;
|
||||
psContext.pRecipW = work.pRecipW;
|
||||
psContext.pSamplePosX = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosX;
|
||||
psContext.pSamplePosY = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosY;
|
||||
psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
|
||||
psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
|
||||
|
||||
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
|
@ -748,9 +736,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
|
||||
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
if(bInputCoverage)
|
||||
if(T::bInputCoverage)
|
||||
{
|
||||
generateInputCoverage<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
|
||||
generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
|
||||
}
|
||||
|
||||
if(coverageMask & MASK)
|
||||
|
@ -762,7 +750,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
|
||||
backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
if(bCentroidPos)
|
||||
if(T::bCentroidPos)
|
||||
{
|
||||
// for 1x case, centroid is pixel center
|
||||
psContext.vX.centroid = psContext.vX.center;
|
||||
|
@ -873,14 +861,9 @@ Endtile:
|
|||
}
|
||||
}
|
||||
|
||||
template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
|
||||
template<typename T>
|
||||
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
|
||||
static const bool bInputCoverage = (bool)inputCoverage;
|
||||
static const bool bCentroidPos = (bool)centroidPos;
|
||||
|
||||
RDTSC_START(BESetup);
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
@ -930,9 +913,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
psContext.I = work.I;
|
||||
psContext.J = work.J;
|
||||
psContext.recipDet = work.recipDet;
|
||||
psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX;
|
||||
psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY;
|
||||
const uint32_t numSamples = MultisampleTraits<sampleCount>::numSamples;
|
||||
psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
|
||||
psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
|
||||
const uint32_t numSamples = T::MultisampleT::numSamples;
|
||||
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
|
@ -951,16 +934,16 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
|
||||
if(bInputCoverage)
|
||||
if(T::bInputCoverage)
|
||||
{
|
||||
generateInputCoverage<sampleCount, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
|
||||
generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
|
||||
}
|
||||
|
||||
if(bCentroidPos)
|
||||
if(T::bCentroidPos)
|
||||
{
|
||||
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
|
||||
RDTSC_START(BEBarycentric);
|
||||
backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
|
||||
CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
}
|
||||
|
||||
|
@ -971,8 +954,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
RDTSC_START(BEBarycentric);
|
||||
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample));
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
|
||||
|
||||
simdmask coverageMask = work.coverageMask[sample] & MASK;
|
||||
simdscalar vCoverageMask = vMask(coverageMask);
|
||||
|
@ -996,8 +979,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
simdscalar stencilPassMask = vCoverageMask;
|
||||
|
||||
// offset depth/stencil buffers current sample
|
||||
uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
|
||||
uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
|
||||
|
||||
// Early-Z?
|
||||
if (CanEarlyZ(pPSState))
|
||||
|
@ -1032,7 +1015,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
//// late-Z
|
||||
// late-Z
|
||||
if (!CanEarlyZ(pPSState))
|
||||
{
|
||||
RDTSC_START(BELateDepthTest);
|
||||
|
@ -1083,16 +1066,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
}
|
||||
}
|
||||
|
||||
template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
|
||||
template<typename T>
|
||||
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
|
||||
static const bool bIsStandardPattern = (bool)samplePattern;
|
||||
static const bool bInputCoverage = (bool)inputCoverage;
|
||||
static const bool bCentroidPos = (bool)centroidPos;
|
||||
static const bool bForcedSampleCount = (bool)forcedSampleCount;
|
||||
|
||||
RDTSC_START(BESetup);
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
@ -1141,35 +1117,25 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
psContext.I = work.I;
|
||||
psContext.J = work.J;
|
||||
psContext.recipDet = work.recipDet;
|
||||
psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX;
|
||||
psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY;
|
||||
psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
|
||||
psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
|
||||
psContext.sampleIndex = 0;
|
||||
|
||||
uint32_t numCoverageSamples;
|
||||
if(bIsStandardPattern)
|
||||
{
|
||||
numCoverageSamples = MultisampleTraits<sampleCount>::numSamples;
|
||||
}
|
||||
else
|
||||
{
|
||||
numCoverageSamples = 1;
|
||||
}
|
||||
|
||||
uint32_t numOMSamples;
|
||||
// RT has to be single sample if we're in forcedMSAA mode
|
||||
if(bForcedSampleCount && (sampleCount > SWR_MULTISAMPLE_1X))
|
||||
if(T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
|
||||
{
|
||||
numOMSamples = 1;
|
||||
}
|
||||
// unless we're forced to single sample, in which case we run the OM at the sample count of the RT
|
||||
else if(bForcedSampleCount && (sampleCount == SWR_MULTISAMPLE_1X))
|
||||
else if(T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
|
||||
{
|
||||
numOMSamples = GetNumSamples(pBlendState->sampleCount);
|
||||
}
|
||||
// else we're in normal MSAA mode and rasterizer and OM are running at the same sample count
|
||||
else
|
||||
{
|
||||
numOMSamples = MultisampleTraits<sampleCount>::numSamples;
|
||||
numOMSamples = T::MultisampleT::numSamples;
|
||||
}
|
||||
|
||||
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
|
@ -1178,21 +1144,21 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps((float)yy));
|
||||
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
simdscalar vZ[MultisampleTraits<sampleCount>::numSamples]{ 0 };
|
||||
simdscalar vZ[T::MultisampleT::numSamples]{ 0 };
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps((float)xx));
|
||||
// set pixel center positions
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
|
||||
|
||||
if (bInputCoverage)
|
||||
if (T::bInputCoverage)
|
||||
{
|
||||
generateInputCoverage<sampleCount, bIsStandardPattern, bForcedSampleCount>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
|
||||
generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
|
||||
}
|
||||
|
||||
if(bCentroidPos)
|
||||
if(T::bCentroidPos)
|
||||
{
|
||||
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
|
||||
RDTSC_START(BEBarycentric);
|
||||
backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
|
||||
CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
}
|
||||
|
||||
|
@ -1219,12 +1185,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
}
|
||||
|
||||
// need to declare enough space for all samples
|
||||
simdscalar vCoverageMask[MultisampleTraits<sampleCount>::numSamples];
|
||||
simdscalar depthPassMask[MultisampleTraits<sampleCount>::numSamples];
|
||||
simdscalar stencilPassMask[MultisampleTraits<sampleCount>::numSamples];
|
||||
simdscalar vCoverageMask[T::MultisampleT::numSamples];
|
||||
simdscalar depthPassMask[T::MultisampleT::numSamples];
|
||||
simdscalar stencilPassMask[T::MultisampleT::numSamples];
|
||||
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
|
||||
simdscalar anyStencilSamplePassed = _simd_setzero_ps();
|
||||
for(uint32_t sample = 0; sample < numCoverageSamples; sample++)
|
||||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
|
||||
{
|
||||
vCoverageMask[sample] = vMask(work.coverageMask[sample] & MASK);
|
||||
|
||||
|
@ -1237,7 +1203,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
continue;
|
||||
}
|
||||
|
||||
if(bForcedSampleCount)
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
// candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
|
||||
const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(pBlendState->sampleMask), _simd_setzero_si()));
|
||||
|
@ -1252,11 +1218,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
if(!pPSState->writesODepth || rastState.clipDistanceMask)
|
||||
{
|
||||
RDTSC_START(BEBarycentric);
|
||||
if(bIsStandardPattern)
|
||||
if(T::bIsStandardPattern)
|
||||
{
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample));
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1291,8 +1257,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
}
|
||||
|
||||
// offset depth/stencil buffers current sample
|
||||
uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
|
||||
uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
|
||||
|
||||
// ZTest for this sample
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
|
@ -1332,8 +1298,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
// loop over all samples, broadcasting the results of the PS to all passing pixels
|
||||
for(uint32_t sample = 0; sample < numOMSamples; sample++)
|
||||
{
|
||||
uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
|
||||
uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
|
||||
uint8_t * pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
|
||||
|
||||
// output merger
|
||||
RDTSC_START(BEOutputMerger);
|
||||
|
@ -1346,12 +1312,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
|
||||
// forcedSampleCount outputs to any pixels with covered samples not masked off by SampleMask
|
||||
// depth test is disabled, so just set the z val to 0.
|
||||
if(bForcedSampleCount)
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
coverageMaskSample = depthMaskSample = anyDepthSamplePassed;
|
||||
vInterpolatedZ = _simd_setzero_ps();
|
||||
}
|
||||
else if(bIsStandardPattern)
|
||||
else if(T::bIsStandardPattern)
|
||||
{
|
||||
if(!_simd_movemask_ps(depthPassMask[sample]))
|
||||
{
|
||||
|
@ -1393,7 +1359,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
|
||||
Endtile:
|
||||
RDTSC_START(BEEndTile);
|
||||
for(uint32_t sample = 0; sample < numCoverageSamples; sample++)
|
||||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
|
||||
{
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
@ -1413,10 +1379,10 @@ Endtile:
|
|||
template<uint32_t sampleCountT>
|
||||
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
///@todo: handle center multisample pattern
|
||||
typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
|
||||
RDTSC_START(BESetup);
|
||||
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
|
||||
|
@ -1464,8 +1430,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
{
|
||||
RDTSC_START(BEBarycentric);
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, MultisampleTraits<sampleCount>::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, MultisampleTraits<sampleCount>::vY(sample));
|
||||
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
|
||||
|
||||
backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
|
||||
|
||||
|
@ -1486,8 +1452,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
simdscalar stencilPassMask = vCoverageMask;
|
||||
|
||||
// offset depth/stencil buffers current sample
|
||||
uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
|
||||
uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
|
||||
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
|
||||
|
@ -1526,7 +1492,6 @@ PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COV
|
|||
PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS+1][SWR_MULTISAMPLE_TYPE_MAX] = {};
|
||||
PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2] = {};
|
||||
PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2] = {};
|
||||
PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2] = {};
|
||||
|
||||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
|
||||
// arguments to static template arguments.
|
||||
|
@ -1573,55 +1538,6 @@ struct OMChooser
|
|||
}
|
||||
};
|
||||
|
||||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
|
||||
// arguments to static template arguments.
|
||||
template <uint32_t... ArgsT>
|
||||
struct BECentroidBarycentricChooser
|
||||
{
|
||||
|
||||
// Last Arg Terminator
|
||||
template <typename... TArgsT>
|
||||
static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg)
|
||||
{
|
||||
if(tArg > 0)
|
||||
{
|
||||
return CalcCentroidBarycentrics<ArgsT..., 1>;
|
||||
}
|
||||
|
||||
return CalcCentroidBarycentrics<ArgsT..., 0>;
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_MULTISAMPLE_1X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_2X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_4X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_8X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_16X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample count\n");
|
||||
return nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
if(tArg > 0)
|
||||
{
|
||||
return BECentroidBarycentricChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
|
||||
}
|
||||
|
||||
return BECentroidBarycentricChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
|
||||
}
|
||||
};
|
||||
|
||||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
|
||||
// arguments to static template arguments.
|
||||
template <uint32_t... ArgsT>
|
||||
|
@ -1632,9 +1548,9 @@ struct BEChooser
|
|||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<ArgsT...>; break;
|
||||
case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<ArgsT...>; break;
|
||||
case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<ArgsT...>; break;
|
||||
case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
|
||||
case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<SwrBackendTraits<ArgsT...>>; break;
|
||||
case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid backend func\n");
|
||||
return nullptr;
|
||||
|
@ -1642,6 +1558,20 @@ struct BEChooser
|
|||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample pattern\n");
|
||||
return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
|
@ -1655,22 +1585,22 @@ struct BEChooser
|
|||
case SWR_MULTISAMPLE_8X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_16X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample count\n");
|
||||
return nullptr;
|
||||
break;
|
||||
SWR_ASSERT(0 && "Invalid sample count\n");
|
||||
return BEChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(uint32_t tArg, TArgsT... remainingArgs)
|
||||
static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
if(tArg > 0)
|
||||
if(tArg == true)
|
||||
{
|
||||
return BEChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
|
||||
}
|
||||
|
||||
return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
|
||||
return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1689,37 +1619,21 @@ void InitBackendOMFuncTable(PFN_OUTPUT_MERGER (&table)[numRenderTargets][numSamp
|
|||
|
||||
template <SWR_MULTISAMPLE_COUNT numSampleRates>
|
||||
void InitBackendBarycentricsTables(PFN_CALC_PIXEL_BARYCENTRICS (&pixelTable)[2],
|
||||
PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2],
|
||||
PFN_CALC_CENTROID_BARYCENTRICS (¢roidTable)[numSampleRates][2][2][2])
|
||||
PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2])
|
||||
{
|
||||
pixelTable[0] = CalcPixelBarycentrics<0>;
|
||||
pixelTable[1] = CalcPixelBarycentrics<1>;
|
||||
|
||||
sampleTable[0] = CalcSampleBarycentrics<0>;
|
||||
sampleTable[1] = CalcSampleBarycentrics<1>;
|
||||
|
||||
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++)
|
||||
{
|
||||
for(uint32_t baryMask = 0; baryMask < 2; baryMask++)
|
||||
{
|
||||
for(uint32_t patternNum = 0; patternNum < 2; patternNum++)
|
||||
{
|
||||
for(uint32_t forcedSampleEnable = 0; forcedSampleEnable < 2; forcedSampleEnable++)
|
||||
{
|
||||
centroidTable[sampleCount][baryMask][patternNum][forcedSampleEnable]=
|
||||
BECentroidBarycentricChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, baryMask, patternNum, forcedSampleEnable);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[2][2])
|
||||
{
|
||||
gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false, false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false, true, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, true, false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, true, true, false, false,(SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
|
||||
}
|
||||
|
||||
template <SWR_MULTISAMPLE_COUNT numSampleRates, SWR_MSAA_SAMPLE_PATTERN numSamplePatterns, SWR_INPUT_COVERAGE numCoverageModes>
|
||||
|
@ -1734,9 +1648,11 @@ void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numSamp
|
|||
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
|
||||
{
|
||||
table[sampleCount][samplePattern][inputCoverage][isCentroid][0] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE);
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), (isCentroid > 0),
|
||||
false, false, SWR_BACKEND_MSAA_PIXEL_RATE);
|
||||
table[sampleCount][samplePattern][inputCoverage][isCentroid][1] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 1, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE);
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), (isCentroid > 0),
|
||||
true, false, SWR_BACKEND_MSAA_PIXEL_RATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1751,9 +1667,9 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numCov
|
|||
for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < numCoverageModes; inputCoverage++)
|
||||
{
|
||||
table[sampleCount][inputCoverage][0] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
table[sampleCount][inputCoverage][1] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), true, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1764,7 +1680,7 @@ void InitBackendFuncTables()
|
|||
InitBackendPixelFuncTable<(SWR_MULTISAMPLE_COUNT)SWR_MULTISAMPLE_TYPE_MAX, SWR_MSAA_SAMPLE_PATTERN_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendPixelRateTable);
|
||||
InitBackendSampleFuncTable<SWR_MULTISAMPLE_TYPE_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendSampleRateTable);
|
||||
InitBackendOMFuncTable<SWR_NUM_RENDERTARGETS+1, SWR_MULTISAMPLE_TYPE_MAX>(gBackendOutputMergerTable);
|
||||
InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable, gCentroidBarycentricTable);
|
||||
InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable);
|
||||
|
||||
gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
|
||||
gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS < SWR_MULTISAMPLE_2X > ;
|
||||
|
|
|
@ -60,7 +60,7 @@ extern const __m256 vULOffsetsY;
|
|||
#define MASK 0xff
|
||||
#endif
|
||||
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCountT, bool bIsStandardPattern, bool bForcedSampleCount>
|
||||
template<typename T>
|
||||
INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
|
||||
{
|
||||
|
||||
|
@ -69,28 +69,28 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
|
||||
__m256i mask[2];
|
||||
__m256i sampleCoverage[2];
|
||||
if(bIsStandardPattern)
|
||||
if(T::bIsStandardPattern)
|
||||
{
|
||||
__m256i src = _mm256_set1_epi32(0);
|
||||
__m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
|
||||
|
||||
if(MultisampleTraits<sampleCountT>::numSamples == 1)
|
||||
if(T::MultisampleT::numSamples == 1)
|
||||
{
|
||||
mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 2)
|
||||
else if(T::MultisampleT::numSamples == 2)
|
||||
{
|
||||
mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 4)
|
||||
else if(T::MultisampleT::numSamples == 4)
|
||||
{
|
||||
mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 8)
|
||||
else if(T::MultisampleT::numSamples == 8)
|
||||
{
|
||||
mask[0] = _mm256_set1_epi32(-1);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 16)
|
||||
else if(T::MultisampleT::numSamples == 16)
|
||||
{
|
||||
mask[0] = _mm256_set1_epi32(-1);
|
||||
mask[1] = _mm256_set1_epi32(-1);
|
||||
|
@ -99,7 +99,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
|
||||
// gather coverage for samples 0-7
|
||||
sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
|
||||
if(MultisampleTraits<sampleCountT>::numSamples > 8)
|
||||
if(T::MultisampleT::numSamples > 8)
|
||||
{
|
||||
// gather coverage for samples 8-15
|
||||
sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
|
||||
|
@ -109,23 +109,23 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
{
|
||||
// center coverage is the same for all samples; just broadcast to the sample slots
|
||||
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
|
||||
if(MultisampleTraits<sampleCountT>::numSamples == 1)
|
||||
if(T::MultisampleT::numSamples == 1)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 2)
|
||||
else if(T::MultisampleT::numSamples == 2)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 4)
|
||||
else if(T::MultisampleT::numSamples == 4)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 8)
|
||||
else if(T::MultisampleT::numSamples == 8)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
|
||||
}
|
||||
else if(MultisampleTraits<sampleCountT>::numSamples == 16)
|
||||
else if(T::MultisampleT::numSamples == 16)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
|
||||
sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
|
||||
|
@ -138,7 +138,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
__m256i packedCoverage0 = _simd_shuffle_epi8(sampleCoverage[0], mask[0]);
|
||||
|
||||
__m256i packedCoverage1;
|
||||
if(MultisampleTraits<sampleCountT>::numSamples > 8)
|
||||
if(T::MultisampleT::numSamples > 8)
|
||||
{
|
||||
// pull out the the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit lane
|
||||
packedCoverage1 = _simd_shuffle_epi8(sampleCoverage[1], mask[0]);
|
||||
|
@ -151,7 +151,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
packedCoverage0 = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
|
||||
|
||||
__m256i packedSampleCoverage;
|
||||
if(MultisampleTraits<sampleCountT>::numSamples > 8)
|
||||
if(T::MultisampleT::numSamples > 8)
|
||||
{
|
||||
// pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
|
||||
hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
|
||||
|
@ -170,7 +170,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
packedCoverage0 = _mm256_permutevar8x32_epi32(packedCoverage0, permMask);
|
||||
|
||||
__m256i packedSampleCoverage;
|
||||
if(MultisampleTraits<sampleCountT>::numSamples > 8)
|
||||
if(T::MultisampleT::numSamples > 8)
|
||||
{
|
||||
permMask = _mm256_set_epi32(0x7, 0x7, 0x7, 0x7, 0x4, 0x0, 0x7, 0x7);
|
||||
// pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
|
||||
|
@ -190,7 +190,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
// convert packed sample coverage masks into single coverage masks for all samples for each pixel in the 4x2
|
||||
inputMask[i] = _simd_movemask_epi8(packedSampleCoverage);
|
||||
|
||||
if(!bForcedSampleCount)
|
||||
if(!T::bForcedSampleCount)
|
||||
{
|
||||
// input coverage has to be anded with sample mask if MSAA isn't forced on
|
||||
inputMask[i] &= sampleMask;
|
||||
|
@ -201,10 +201,22 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
|
|||
}
|
||||
}
|
||||
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCountT, bool bIsStandardPattern, bool bForcedSampleCount>
|
||||
template<typename T>
|
||||
INLINE void generateInputCoverage(const uint64_t *const coverageMask, __m256 &inputCoverage, const uint32_t sampleMask)
|
||||
{
|
||||
uint32_t inputMask[KNOB_SIMD_WIDTH];
|
||||
generateInputCoverage<sampleCountT, bIsStandardPattern, bForcedSampleCount>(coverageMask, inputMask, sampleMask);
|
||||
generateInputCoverage<T>(coverageMask, inputMask, sampleMask);
|
||||
inputCoverage = _simd_castsi_ps(_mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]));
|
||||
}
|
||||
|
||||
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
|
||||
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t odepth = 0>
|
||||
struct SwrBackendTraits
|
||||
{
|
||||
static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
|
||||
static const bool bInputCoverage = (coverage == 1);
|
||||
static const bool bCentroidPos = (centroid == 1);
|
||||
static const bool bForcedSampleCount = (forced == 1);
|
||||
static const bool bWritesODepth = (odepth == 1);
|
||||
typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
|
||||
};
|
|
@ -49,3 +49,16 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
|
|||
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
|
||||
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
|
||||
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
|
||||
{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
|
||||
{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
|
@ -54,7 +54,7 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
|
|||
// hardcoded offsets based on Direct3d standard multisample positions
|
||||
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
|
||||
// coords are 0.8 fixed point offsets from (0, 0)
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCount>
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
|
||||
struct MultisampleTraits
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum) = delete;
|
||||
|
@ -74,7 +74,7 @@ struct MultisampleTraits
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
|
@ -143,10 +143,74 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X>
|
|||
static const float samplePosX;
|
||||
static const float samplePosY;
|
||||
static const uint32_t numSamples = 1;
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
|
||||
|
||||
static const uint32_t numSamples = 1;
|
||||
static const float samplePosX;
|
||||
static const float samplePosY;
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
|
@ -238,10 +302,92 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X>
|
|||
static const float samplePosX[2];
|
||||
static const float samplePosY[2];
|
||||
static const uint32_t numSamples = 2;
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
|
||||
static const uint32_t numCoverageSamples = 2;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask =_simd_set1_epi32(0x3);
|
||||
return mask;
|
||||
}
|
||||
static const uint32_t numSamples = 2;
|
||||
static const float samplePosX[2];
|
||||
static const float samplePosY[2];
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
|
@ -343,10 +489,98 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X>
|
|||
static const float samplePosX[4];
|
||||
static const float samplePosY[4];
|
||||
static const uint32_t numSamples = 4;
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
|
||||
static const uint32_t numCoverageSamples = 4;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xF);
|
||||
return mask;
|
||||
}
|
||||
static const uint32_t numSamples = 4;
|
||||
static const float samplePosX[4];
|
||||
static const float samplePosY[4];
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
|
@ -464,10 +698,110 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X>
|
|||
static const float samplePosX[8];
|
||||
static const float samplePosY[8];
|
||||
static const uint32_t numSamples = 8;
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
|
||||
static const uint32_t numCoverageSamples = 8;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFF);
|
||||
return mask;
|
||||
}
|
||||
static const uint32_t numSamples = 8;
|
||||
static const float samplePosX[8];
|
||||
static const float samplePosY[8];
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
|
@ -617,4 +951,128 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X>
|
|||
static const float samplePosX[16];
|
||||
static const float samplePosY[16];
|
||||
static const uint32_t numSamples = 16;
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
|
||||
static const uint32_t numCoverageSamples = 16;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 10,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 11,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 12,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 13,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 14,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 15,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 10,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 11,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 12,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 13,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 14,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 15,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 8,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 9,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 10,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 11,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 12,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 13,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 14,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 15,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
|
||||
return mask;
|
||||
}
|
||||
static const uint32_t numSamples = 16;
|
||||
static const float samplePosX[16];
|
||||
static const float samplePosY[16];
|
||||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue