swr: [rasterizer core] Programmable sample position support

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-03-28 15:32:04 -05:00
parent 3c52a7316a
commit 117fc582f8
12 changed files with 267 additions and 596 deletions

View File

@ -60,6 +60,8 @@ def gen_llvm_type(type, name, is_pointer, is_pointer_pointer, is_array, is_array
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
elif type == 'simdscalari':
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
elif type == '__m128i':
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), 4)'
elif type == 'SIMD8::vector_t':
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), 8)'
elif type == 'SIMD8::vectori_t':
@ -145,6 +147,26 @@ def gen_llvm_types(input_file, output_file):
else:
is_llvm_struct = False
###########################################
# Is field the start of a function? Tells script to ignore it
is_llvm_func_start = re.search(r'@llvm_func_start', line)
if is_llvm_func_start is not None:
while not end_of_struct and idx < len(lines)-1:
idx += 1
line = lines[idx].rstrip()
is_llvm_func_end = re.search(r'@llvm_func_end', line)
if is_llvm_func_end is not None:
break;
continue
###########################################
# Is field a function? Tells script to ignore it
is_llvm_func = re.search(r'@llvm_func', line)
if is_llvm_func is not None:
continue
###########################################
# Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
is_llvm_enum = re.search(r'@llvm_enum', line)

View File

@ -648,6 +648,13 @@ simdscalari _simd_blendv_epi32(simdscalari a, simdscalari b, simdscalari mask)
return _simd_castps_si(_simd_blendv_ps(_simd_castsi_ps(a), _simd_castsi_ps(b), _simd_castsi_ps(mask)));
}
template<int mask>
INLINE
__m128i _simd_blend4_epi32(__m128i a, __m128i b)
{
return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), mask));
}
// convert bitmask to vector mask
INLINE
simdscalar vMask(int32_t mask)

View File

@ -793,7 +793,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
const SWR_RASTSTATE &rastState = pState->state.rastState;
const SWR_PS_STATE &psState = pState->state.psState;
BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
// setup backend
if (psState.pfnPixelShader == nullptr)
@ -802,7 +801,8 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
}
else
{
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
@ -815,7 +815,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
{
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
[centroid][forcedSampleCount][canEarlyZ]
;
}
@ -827,7 +827,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
}
break;
case SWR_SHADING_RATE_SAMPLE:
SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
SWR_ASSERT(rastState.bIsCenterPattern != true);
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];

View File

@ -468,7 +468,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
SetupPixelShaderContext<T>(&psContext, work);
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
AR_END(BESetup, 1);
@ -517,7 +518,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, true>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
@ -663,7 +664,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
SetupPixelShaderContext<T>(&psContext, work);
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
AR_END(BESetup, 0);
@ -696,7 +698,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
@ -725,8 +727,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
CalcSampleBarycentrics(coeffs, psContext);
@ -870,7 +872,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
AR_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
typedef SwrBackendTraits<sampleCountT, false> T;
AR_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
@ -889,7 +891,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
@ -928,8 +930,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
CalcSampleBarycentrics(coeffs, psContext);
@ -995,7 +997,7 @@ PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
[2] // canEarlyZ
= {};
PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
[SWR_MSAA_SAMPLE_PATTERN_COUNT]
[2] // isCenterPattern
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
@ -1027,21 +1029,6 @@ struct BEChooser
}
}
// Recursively parse args
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
{
switch(tArg)
{
case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
default:
SWR_ASSERT(0 && "Invalid sample pattern\n");
return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
break;
}
}
// Recursively parse args
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
@ -1098,7 +1085,7 @@ void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COU
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[inputCoverage][isCentroid][canEarlyZ] =
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
}
}
@ -1116,7 +1103,7 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_C
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
}
}

View File

@ -48,7 +48,7 @@ void InitCPSFuncTables();
void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
[SWR_MSAA_SAMPLE_PATTERN_COUNT]
[2] // isCenterPattern
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
@ -153,7 +153,34 @@ struct generateInputCoverage
__m256i mask[2];
__m256i sampleCoverage[2];
if(T::bIsStandardPattern)
if(T::bIsCenterPattern)
{
// center coverage is the same for all samples; just broadcast to the sample slots
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
if(T::MultisampleT::numSamples == 1)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
}
else if(T::MultisampleT::numSamples == 2)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 4)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 8)
{
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
}
else if(T::MultisampleT::numSamples == 16)
{
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
}
}
else
{
__m256i src = _mm256_set1_epi32(0);
__m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
@ -189,32 +216,6 @@ struct generateInputCoverage
sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
}
}
else
{
// center coverage is the same for all samples; just broadcast to the sample slots
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
if(T::MultisampleT::numSamples == 1)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
}
else if(T::MultisampleT::numSamples == 2)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 4)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 8)
{
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
}
else if(T::MultisampleT::numSamples == 16)
{
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
}
}
mask[0] = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0);
@ -332,7 +333,8 @@ struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<typename T>
INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
const uint64_t *const coverageMask, const uint32_t sampleMask,
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
@ -352,23 +354,23 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
// look up and set the sample offsets from UL pixel corner for first covered sample
__m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
T::MultisampleT::X(sampleNum[6]),
T::MultisampleT::X(sampleNum[5]),
T::MultisampleT::X(sampleNum[4]),
T::MultisampleT::X(sampleNum[3]),
T::MultisampleT::X(sampleNum[2]),
T::MultisampleT::X(sampleNum[1]),
T::MultisampleT::X(sampleNum[0]));
__m256 vXSample = _mm256_set_ps(samplePos.X(sampleNum[7]),
samplePos.X(sampleNum[6]),
samplePos.X(sampleNum[5]),
samplePos.X(sampleNum[4]),
samplePos.X(sampleNum[3]),
samplePos.X(sampleNum[2]),
samplePos.X(sampleNum[1]),
samplePos.X(sampleNum[0]));
__m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
T::MultisampleT::Y(sampleNum[6]),
T::MultisampleT::Y(sampleNum[5]),
T::MultisampleT::Y(sampleNum[4]),
T::MultisampleT::Y(sampleNum[3]),
T::MultisampleT::Y(sampleNum[2]),
T::MultisampleT::Y(sampleNum[1]),
T::MultisampleT::Y(sampleNum[0]));
__m256 vYSample = _mm256_set_ps(samplePos.Y(sampleNum[7]),
samplePos.Y(sampleNum[6]),
samplePos.Y(sampleNum[5]),
samplePos.Y(sampleNum[4]),
samplePos.Y(sampleNum[3]),
samplePos.Y(sampleNum[2]),
samplePos.Y(sampleNum[1]),
samplePos.Y(sampleNum[0]));
// add sample offset to UL pixel corner
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
@ -398,8 +400,8 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
__m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
vXSample = _simd_set1_ps(samplePos.X(firstCoveredSampleMaskSample));
vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
// blend in case 3a pixel locations
psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
@ -494,7 +496,7 @@ inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uin
}
template<typename T>
void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC &work)
void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
{
psContext->pAttribs = work.pAttribs;
psContext->pPerspAttribs = work.pPerspAttribs;
@ -507,14 +509,15 @@ void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC
psContext->recipDet = work.recipDet;
psContext->pRecipW = work.pRecipW;
psContext->pSamplePosX = reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
psContext->pSamplePosY = reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
psContext->sampleIndex = 0;
}
template<typename T, bool IsSingleSample>
void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
{
if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
{
@ -530,16 +533,16 @@ void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, co
if (T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
if (T::bIsStandardPattern)
{
// add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
CalcCentroidPos<T>(*psContext, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
}
else
if (T::bIsCenterPattern)
{
psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
}
else
{
// add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
}
CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
}
@ -557,8 +560,9 @@ struct PixelRateZTestLoop
PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer) {};
samplePos(state.rastState.samplePositions),
clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
INLINE
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
@ -597,8 +601,8 @@ struct PixelRateZTestLoop
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
// calc I & J per sample
CalcSampleBarycentrics(coeffs, psContext);
@ -673,6 +677,7 @@ private:
const BarycentricCoeffs& coeffs;
const API_STATE& state;
const SWR_PS_STATE& psState;
const SWR_MULTISAMPLE_POS& samplePos;
const uint8_t clipDistanceMask;
uint8_t*& pDepthBuffer;
uint8_t*& pStencilBuffer;
@ -862,7 +867,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
SetupBarycentricCoeffs(&coeffs, work);
SWR_PS_CONTEXT psContext;
SetupPixelShaderContext<T>(&psContext, work);
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
@ -887,7 +893,6 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
#if USE_8x2_TILE_BACKEND
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
#endif
simdscalar activeLanes;
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
@ -904,7 +909,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
@ -966,7 +971,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
simdscalar coverageMask, depthMask;
if(T::bForcedSampleCount)
{
@ -1045,15 +1050,15 @@ Endtile:
AR_END(BEPixelRateBackend, 0);
}
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
>
struct SwrBackendTraits
{
static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
static const bool bIsCenterPattern = (isCenter == 1);
static const uint32_t InputCoverage = coverage;
static const bool bCentroidPos = (centroid == 1);
static const bool bForcedSampleCount = (forced == 1);
static const bool bCanEarlyZ = (canEarlyZ == 1);
typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
};

View File

@ -640,9 +640,8 @@ void BinTriangles(
else
{
// degenerate triangles won't be sent to rasterizer; just enable all edges
pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
(rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
(state.scissorsTileAligned == false));
pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, (state.scissorsTileAligned == false));
}
if (!triMask)
@ -658,7 +657,7 @@ void BinTriangles(
// only discard for non-MSAA case and when conservative rast is disabled
// (xmin + 127) & ~255
// (xmax + 128) & ~255
if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.samplePattern == SWR_MSAA_CENTER_PATTERN) &&
if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.bIsCenterPattern) &&
(!CT::IsConservativeT::value))
{
origTriMask = triMask;
@ -787,9 +786,8 @@ endBinTriangles:
{
// only rasterize valid edges if we have a degenerate primitive
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
work.pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
(rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
(state.scissorsTileAligned == false));
work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, (state.scissorsTileAligned == false));
// Degenerate triangles are required to be constant interpolated
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;

View File

@ -50,16 +50,3 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };

View File

@ -58,70 +58,21 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
// hardcoded offsets based on Direct3d standard multisample positions
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
// coords are 0.8 fixed point offsets from (0, 0)
template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
struct MultisampleTraits
{
INLINE static __m128i vXi(uint32_t sampleNum) = delete;
INLINE static __m128i vYi(uint32_t sampleNum) = delete;
INLINE static simdscalar vX(uint32_t sampleNum) = delete;
INLINE static simdscalar vY(uint32_t sampleNum) = delete;
INLINE static float X(uint32_t sampleNum) = delete;
INLINE static float Y(uint32_t sampleNum) = delete;
INLINE static __m128i TileSampleOffsetsX() = delete;
INLINE static __m128i TileSampleOffsetsY() = delete;
INLINE static simdscalari FullSampleMask() = delete;
static const uint32_t numSamples = 0;
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
static const __m128i X = _mm_set1_epi32(samplePosXi);
return X;
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
static const __m128i Y = _mm_set1_epi32(samplePosYi);
return Y;
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
static const simdscalar X = _simd_set1_ps(0.5f);
return X;
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
static const simdscalar Y = _simd_set1_ps(0.5f);
return Y;
}
INLINE static float X(uint32_t sampleNum) {return samplePosX;};
INLINE static float Y(uint32_t sampleNum) {return samplePosY;};
INLINE static __m128i TileSampleOffsetsX()
{
static const uint32_t bboxLeftEdge = 0x80;
static const uint32_t bboxRightEdge = 0x80;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
return tileSampleOffsetX;
}
INLINE static __m128i TileSampleOffsetsY()
{
static const uint32_t bboxTopEdge = 0x80;
static const uint32_t bboxBottomEdge = 0x80;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
return tileSampleOffsetY;
}
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
static const uint32_t samplePosXi;
@ -134,43 +85,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
INLINE static __m128i TileSampleOffsetsX()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static __m128i TileSampleOffsetsY()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
static const uint32_t numSamples = 1;
@ -181,57 +99,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
SWR_ASSERT(sampleNum < numSamples);
static const __m128i X[numSamples] {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1])};
return X[sampleNum];
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
SWR_ASSERT(sampleNum < numSamples);
static const __m128i Y[numSamples] {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1])};
return Y[sampleNum];
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
static const simdscalar X[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
assert(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
static const simdscalar Y[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
assert(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
INLINE static __m128i TileSampleOffsetsX()
{
static const uint32_t bboxLeftEdge = 0x40;
static const uint32_t bboxRightEdge = 0xC0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
return tileSampleOffsetX;
}
INLINE static __m128i TileSampleOffsetsY()
{
static const uint32_t bboxTopEdge = 0x40;
static const uint32_t bboxBottomEdge = 0xC0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
return tileSampleOffsetY;
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask =_simd_set1_epi32(0x3);
@ -248,43 +119,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
INLINE static __m128i TileSampleOffsetsX()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static __m128i TileSampleOffsetsY()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask =_simd_set1_epi32(0x3);
@ -298,61 +136,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
static const __m128i X[numSamples]
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3])};
SWR_ASSERT(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
static const __m128i Y[numSamples]
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3])};
SWR_ASSERT(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
static const simdscalar X[numSamples]
{_simd_set1_ps(0.375f), _simd_set1_ps(0.875), _simd_set1_ps(0.125), _simd_set1_ps(0.625)};
assert(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
static const simdscalar Y[numSamples]
{_simd_set1_ps(0.125), _simd_set1_ps(0.375f), _simd_set1_ps(0.625), _simd_set1_ps(0.875)};
assert(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
INLINE static __m128i TileSampleOffsetsX()
{
static const uint32_t bboxLeftEdge = 0x20;
static const uint32_t bboxRightEdge = 0xE0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
return tileSampleOffsetX;
}
INLINE static __m128i TileSampleOffsetsY()
{
static const uint32_t bboxTopEdge = 0x20;
static const uint32_t bboxBottomEdge = 0xE0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
return tileSampleOffsetY;
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
@ -369,48 +156,16 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
INLINE static __m128i TileSampleOffsetsX()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static __m128i TileSampleOffsetsY()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
return mask;
}
static const uint32_t numSamples = 4;
static const float samplePosX[4];
static const float samplePosY[4];
@ -419,65 +174,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
static const __m128i X[numSamples]
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
_mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7])};
SWR_ASSERT(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
static const __m128i Y[numSamples]
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
_mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7])};
SWR_ASSERT(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
static const simdscalar X[numSamples]
{_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.8125), _simd_set1_ps(0.3125),
_simd_set1_ps(0.1875), _simd_set1_ps(0.0625), _simd_set1_ps(0.6875), _simd_set1_ps(0.9375)};
assert(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
static const simdscalar Y[numSamples]
{_simd_set1_ps(0.3125), _simd_set1_ps(0.6875), _simd_set1_ps(0.5625), _simd_set1_ps(0.1875),
_simd_set1_ps(0.8125), _simd_set1_ps(0.4375), _simd_set1_ps(0.9375), _simd_set1_ps(0.0625)};
assert(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
INLINE static __m128i TileSampleOffsetsX()
{
static const uint32_t bboxLeftEdge = 0x10;
static const uint32_t bboxRightEdge = 0xF0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
return tileSampleOffsetX;
}
INLINE static __m128i TileSampleOffsetsY()
{
static const uint32_t bboxTopEdge = 0x10;
static const uint32_t bboxBottomEdge = 0xF0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
return tileSampleOffsetY;
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
@ -494,43 +194,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
INLINE static __m128i TileSampleOffsetsX()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static __m128i TileSampleOffsetsY()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
@ -544,73 +211,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
static const __m128i X[numSamples]
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
_mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7]),
_mm_set1_epi32(samplePosXi[8]), _mm_set1_epi32(samplePosXi[9]), _mm_set1_epi32(samplePosXi[10]), _mm_set1_epi32(samplePosXi[11]),
_mm_set1_epi32(samplePosXi[12]), _mm_set1_epi32(samplePosXi[13]), _mm_set1_epi32(samplePosXi[14]), _mm_set1_epi32(samplePosXi[15])};
SWR_ASSERT(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
static const __m128i Y[numSamples]
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
_mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7]),
_mm_set1_epi32(samplePosYi[8]), _mm_set1_epi32(samplePosYi[9]), _mm_set1_epi32(samplePosYi[10]), _mm_set1_epi32(samplePosYi[11]),
_mm_set1_epi32(samplePosYi[12]), _mm_set1_epi32(samplePosYi[13]), _mm_set1_epi32(samplePosYi[14]), _mm_set1_epi32(samplePosYi[15])};
SWR_ASSERT(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
static const simdscalar X[numSamples]
{_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.3125), _simd_set1_ps(0.7500),
_simd_set1_ps(0.1875), _simd_set1_ps(0.6250), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875),
_simd_set1_ps(0.3750), _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.1250),
_simd_set1_ps(0.0000), _simd_set1_ps(0.9375), _simd_set1_ps(0.8750), _simd_set1_ps(0.0625)};
assert(sampleNum < numSamples);
return X[sampleNum];
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
static const simdscalar Y[numSamples]
{_simd_set1_ps(0.5625), _simd_set1_ps(0.3125), _simd_set1_ps(0.6250), _simd_set1_ps(0.4375),
_simd_set1_ps(0.3750), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875), _simd_set1_ps(0.1875),
_simd_set1_ps(0.8750), _simd_set1_ps(0.0625), _simd_set1_ps(0.1250), _simd_set1_ps(0.7500),
_simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.9375), _simd_set1_ps(0.0000)};
assert(sampleNum < numSamples);
return Y[sampleNum];
}
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
INLINE static __m128i TileSampleOffsetsX()
{
static const uint32_t bboxLeftEdge = 0x00;
static const uint32_t bboxRightEdge = 0xF0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
return tileSampleOffsetX;
}
INLINE static __m128i TileSampleOffsetsY()
{
static const uint32_t bboxTopEdge = 0x00;
static const uint32_t bboxBottomEdge = 0xF0;
// BR, BL, UR, UL
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
return tileSampleOffsetY;
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
@ -627,43 +231,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static __m128i vYi(uint32_t sampleNum)
{
return _mm_set1_epi32(0x80);
}
INLINE static simdscalar vX(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static simdscalar vY(uint32_t sampleNum)
{
return _simd_set1_ps(0.5f);
}
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
INLINE static __m128i TileSampleOffsetsX()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static __m128i TileSampleOffsetsY()
{
// BR, BL, UR, UL
return _mm_set1_epi32(0x80);
}
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
@ -675,3 +246,50 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
static const uint32_t numCoverageSamples = 1;
};
INLINE
bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
{
// detect if we're using standard or center sample patterns
const uint32_t *standardPosX, *standardPosY;
switch(sampleCount)
{
case SWR_MULTISAMPLE_1X:
standardPosX = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
standardPosY = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosYi;
break;
case SWR_MULTISAMPLE_2X:
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi;
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi;
break;
case SWR_MULTISAMPLE_4X:
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi;
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi;
break;
case SWR_MULTISAMPLE_8X:
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi;
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi;
break;
case SWR_MULTISAMPLE_16X:
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi;
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi;
break;
default:
break;
}
// scan sample pattern for standard or center
uint32_t numSamples = GetNumSamples(sampleCount);
bool bIsStandard = true;
if(numSamples > 1)
{
for(uint32_t i = 0; i < numSamples; i++)
{
bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
(standardPosY[i] == samplePos.Yi(i));
if(!bIsStandard)
break;
}
}
return !bIsStandard;
}

View File

@ -1118,8 +1118,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
__m256d vEdgeTileBbox[3];
if (NumCoverageSamplesT::value > 1)
{
__m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX();
__m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY();
const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
__m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
__m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
@ -1206,8 +1207,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
else
{
__m128i vSampleOffsetXh = RT::MT::vXi(sampleNum);
__m128i vSampleOffsetYh = RT::MT::vYi(sampleNum);
const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
__m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
__m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
__m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
__m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
@ -1340,7 +1342,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
// overwrite texcoords for point sprites
@ -1673,7 +1675,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
// make sure this macrotile intersects the triangle

View File

@ -115,8 +115,7 @@ template <typename NumSamplesT, typename CenterPatternT, typename ConservativeT,
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
{
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
(CenterPatternT::value ? SWR_MSAA_CENTER_PATTERN : SWR_MSAA_STANDARD_PATTERN)> MT;
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
/// Fixed point precision the rasterizer is using
typedef FixedPointTraits<Fixed_16_8> PrecisionT;

View File

@ -29,6 +29,8 @@
#include "common/formats.h"
#include "common/simdintrin.h"
#include <functional>
#include <algorithm>
//////////////////////////////////////////////////////////////////////////
/// PRIMITIVE_TOPOLOGY.
@ -333,8 +335,7 @@ struct SWR_PS_CONTEXT
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
// IN: Pointers to render target hottiles
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
};
//////////////////////////////////////////////////////////////////////////
@ -909,13 +910,6 @@ enum SWR_FRONTWINDING
};
enum SWR_MSAA_SAMPLE_PATTERN
{
SWR_MSAA_CENTER_PATTERN,
SWR_MSAA_STANDARD_PATTERN,
SWR_MSAA_SAMPLE_PATTERN_COUNT
};
enum SWR_PIXEL_LOCATION
{
SWR_PIXEL_LOCATION_CENTER,
@ -925,16 +919,75 @@ enum SWR_PIXEL_LOCATION
// fixed point screen space sample locations within a pixel
struct SWR_MULTISAMPLE_POS
{
uint32_t x;
uint32_t y;
};
public:
INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
INLINE sampleArrayT X() const { return _x; }; // @llvm_func
INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
INLINE void PrecalcSampleData(int numSamples) // @llvm_func_start
{
for(int i = 0; i < numSamples; i++)
{
_vXi[i] = _mm_set1_epi32(_xi[i]);
_vYi[i] = _mm_set1_epi32(_yi[i]);
_vX[i] = _simd_set1_ps(_x[i]);
_vY[i] = _simd_set1_ps(_y[i]);
}
// precalculate the raster tile BB for the rasterizer.
CalcTileSampleOffsets(numSamples);
} // @llvm_func_end
private:
INLINE void CalcTileSampleOffsets(int numSamples) // @llvm_func_start
{
auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
{
__m128i vMin = _mm_set1_epi32(*min);
__m128i vMax = _mm_set1_epi32(*max);
return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
};
auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
std::integral_constant<int, 0xA> xMask;
// BR(max), BL(min), UR(max), UL(min)
tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
std::integral_constant<int, 0xC> yMask;
// BR(max), BL(min), UR(max), UL(min)
tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
}; // @llvm_func_end
// scalar sample values
uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
float _x[SWR_MAX_NUM_MULTISAMPLES];
float _y[SWR_MAX_NUM_MULTISAMPLES];
// precalc'd / vectorized samples
__m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
__m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
__m128i tileSampleOffsetsX;
__m128i tileSampleOffsetsY;
enum SWR_MSAA_RASTMODE
{
SWR_MSAA_RASTMODE_OFF_PIXEL,
SWR_MSAA_RASTMODE_OFF_PATTERN,
SWR_MSAA_RASTMODE_ON_PIXEL,
SWR_MSAA_RASTMODE_ON_PATTERN
};
//////////////////////////////////////////////////////////////////////////
@ -951,7 +1004,6 @@ struct SWR_RASTSTATE
uint32_t pointParam : 1;
uint32_t pointSpriteEnable : 1;
uint32_t pointSpriteTopOrigin : 1;
uint32_t msaaRastEnable : 1;
uint32_t forcedSampleCount : 1;
uint32_t pixelOffset : 1;
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
@ -965,15 +1017,11 @@ struct SWR_RASTSTATE
float depthBiasClamp;
SWR_FORMAT depthFormat; // @llvm_enum
///@todo: MSAA lines
// multisample state for MSAA lines
SWR_MSAA_RASTMODE rastMode; // @llvm_enum
// sample count the rasterizer is running at
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
uint32_t pixelLocation; // UL or Center
SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
bool bIsCenterPattern; // @llvm_enum
// user clip/cull distance enables
uint8_t cullDistanceMask;

View File

@ -1061,8 +1061,6 @@ swr_update_derived(struct pipe_context *pipe,
rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
/* XXX TODO: Add multisample */
rastState->msaaRastEnable = false;
rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
rastState->sampleCount = SWR_MULTISAMPLE_1X;
rastState->forcedSampleCount = false;