swr: [rasterizer core] Programmable sample position support
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
3c52a7316a
commit
117fc582f8
|
@ -60,6 +60,8 @@ def gen_llvm_type(type, name, is_pointer, is_pointer_pointer, is_array, is_array
|
|||
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
|
||||
elif type == 'simdscalari':
|
||||
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
|
||||
elif type == '__m128i':
|
||||
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), 4)'
|
||||
elif type == 'SIMD8::vector_t':
|
||||
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), 8)'
|
||||
elif type == 'SIMD8::vectori_t':
|
||||
|
@ -145,6 +147,26 @@ def gen_llvm_types(input_file, output_file):
|
|||
else:
|
||||
is_llvm_struct = False
|
||||
|
||||
###########################################
|
||||
# Is field the start of a function? Tells script to ignore it
|
||||
is_llvm_func_start = re.search(r'@llvm_func_start', line)
|
||||
|
||||
if is_llvm_func_start is not None:
|
||||
while not end_of_struct and idx < len(lines)-1:
|
||||
idx += 1
|
||||
line = lines[idx].rstrip()
|
||||
is_llvm_func_end = re.search(r'@llvm_func_end', line)
|
||||
if is_llvm_func_end is not None:
|
||||
break;
|
||||
continue
|
||||
|
||||
###########################################
|
||||
# Is field a function? Tells script to ignore it
|
||||
is_llvm_func = re.search(r'@llvm_func', line)
|
||||
|
||||
if is_llvm_func is not None:
|
||||
continue
|
||||
|
||||
###########################################
|
||||
# Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
|
||||
is_llvm_enum = re.search(r'@llvm_enum', line)
|
||||
|
|
|
@ -648,6 +648,13 @@ simdscalari _simd_blendv_epi32(simdscalari a, simdscalari b, simdscalari mask)
|
|||
return _simd_castps_si(_simd_blendv_ps(_simd_castsi_ps(a), _simd_castsi_ps(b), _simd_castsi_ps(mask)));
|
||||
}
|
||||
|
||||
template<int mask>
|
||||
INLINE
|
||||
__m128i _simd_blend4_epi32(__m128i a, __m128i b)
|
||||
{
|
||||
return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), mask));
|
||||
}
|
||||
|
||||
// convert bitmask to vector mask
|
||||
INLINE
|
||||
simdscalar vMask(int32_t mask)
|
||||
|
|
|
@ -793,7 +793,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
const SWR_RASTSTATE &rastState = pState->state.rastState;
|
||||
const SWR_PS_STATE &psState = pState->state.psState;
|
||||
BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
|
||||
const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
|
||||
|
||||
// setup backend
|
||||
if (psState.pfnPixelShader == nullptr)
|
||||
|
@ -802,7 +801,8 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
}
|
||||
else
|
||||
{
|
||||
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
|
||||
const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
|
||||
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
|
||||
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
|
||||
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
|
||||
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
|
||||
|
@ -815,7 +815,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
{
|
||||
// always need to generate I & J per sample for Z interpolation
|
||||
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
|
||||
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
|
||||
backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
|
||||
[centroid][forcedSampleCount][canEarlyZ]
|
||||
;
|
||||
}
|
||||
|
@ -827,7 +827,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
}
|
||||
break;
|
||||
case SWR_SHADING_RATE_SAMPLE:
|
||||
SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
|
||||
SWR_ASSERT(rastState.bIsCenterPattern != true);
|
||||
// always need to generate I & J per sample for Z interpolation
|
||||
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
|
||||
backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
|
||||
|
|
|
@ -468,7 +468,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
SetupPixelShaderContext<T>(&psContext, work);
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
AR_END(BESetup, 1);
|
||||
|
||||
|
@ -517,7 +518,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
|||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, true>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
|
@ -663,7 +664,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
SetupPixelShaderContext<T>(&psContext, work);
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
|
@ -696,7 +698,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
|
@ -725,8 +727,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
|||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
|
||||
|
||||
CalcSampleBarycentrics(coeffs, psContext);
|
||||
|
||||
|
@ -870,7 +872,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
|
||||
AR_BEGIN(BENullBackend, pDC->drawId);
|
||||
///@todo: handle center multisample pattern
|
||||
typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
|
||||
typedef SwrBackendTraits<sampleCountT, false> T;
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
@ -889,7 +891,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
@ -928,8 +930,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
|
||||
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
|
||||
|
||||
CalcSampleBarycentrics(coeffs, psContext);
|
||||
|
||||
|
@ -995,7 +997,7 @@ PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
|
|||
[2] // canEarlyZ
|
||||
= {};
|
||||
PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
|
||||
[SWR_MSAA_SAMPLE_PATTERN_COUNT]
|
||||
[2] // isCenterPattern
|
||||
[SWR_INPUT_COVERAGE_COUNT]
|
||||
[2] // centroid
|
||||
[2] // forcedSampleCount
|
||||
|
@ -1027,21 +1029,6 @@ struct BEChooser
|
|||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample pattern\n");
|
||||
return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
|
||||
|
@ -1098,7 +1085,7 @@ void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COU
|
|||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[inputCoverage][isCentroid][canEarlyZ] =
|
||||
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
|
||||
}
|
||||
}
|
||||
|
@ -1116,7 +1103,7 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_C
|
|||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ void InitCPSFuncTables();
|
|||
void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
|
||||
|
||||
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
|
||||
[SWR_MSAA_SAMPLE_PATTERN_COUNT]
|
||||
[2] // isCenterPattern
|
||||
[SWR_INPUT_COVERAGE_COUNT]
|
||||
[2] // centroid
|
||||
[2] // forcedSampleCount
|
||||
|
@ -153,7 +153,34 @@ struct generateInputCoverage
|
|||
|
||||
__m256i mask[2];
|
||||
__m256i sampleCoverage[2];
|
||||
if(T::bIsStandardPattern)
|
||||
|
||||
if(T::bIsCenterPattern)
|
||||
{
|
||||
// center coverage is the same for all samples; just broadcast to the sample slots
|
||||
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
|
||||
if(T::MultisampleT::numSamples == 1)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 2)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 4)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 8)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 16)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
|
||||
sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256i src = _mm256_set1_epi32(0);
|
||||
__m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
|
||||
|
@ -189,32 +216,6 @@ struct generateInputCoverage
|
|||
sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// center coverage is the same for all samples; just broadcast to the sample slots
|
||||
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
|
||||
if(T::MultisampleT::numSamples == 1)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 2)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 4)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 8)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
|
||||
}
|
||||
else if(T::MultisampleT::numSamples == 16)
|
||||
{
|
||||
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
|
||||
sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
|
||||
}
|
||||
}
|
||||
|
||||
mask[0] = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0);
|
||||
|
@ -332,7 +333,8 @@ struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
|
|||
// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
|
||||
INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
|
||||
const uint64_t *const coverageMask, const uint32_t sampleMask,
|
||||
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
|
||||
{
|
||||
uint32_t inputMask[KNOB_SIMD_WIDTH];
|
||||
|
@ -352,23 +354,23 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
|
|||
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
|
||||
|
||||
// look up and set the sample offsets from UL pixel corner for first covered sample
|
||||
__m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
|
||||
T::MultisampleT::X(sampleNum[6]),
|
||||
T::MultisampleT::X(sampleNum[5]),
|
||||
T::MultisampleT::X(sampleNum[4]),
|
||||
T::MultisampleT::X(sampleNum[3]),
|
||||
T::MultisampleT::X(sampleNum[2]),
|
||||
T::MultisampleT::X(sampleNum[1]),
|
||||
T::MultisampleT::X(sampleNum[0]));
|
||||
__m256 vXSample = _mm256_set_ps(samplePos.X(sampleNum[7]),
|
||||
samplePos.X(sampleNum[6]),
|
||||
samplePos.X(sampleNum[5]),
|
||||
samplePos.X(sampleNum[4]),
|
||||
samplePos.X(sampleNum[3]),
|
||||
samplePos.X(sampleNum[2]),
|
||||
samplePos.X(sampleNum[1]),
|
||||
samplePos.X(sampleNum[0]));
|
||||
|
||||
__m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
|
||||
T::MultisampleT::Y(sampleNum[6]),
|
||||
T::MultisampleT::Y(sampleNum[5]),
|
||||
T::MultisampleT::Y(sampleNum[4]),
|
||||
T::MultisampleT::Y(sampleNum[3]),
|
||||
T::MultisampleT::Y(sampleNum[2]),
|
||||
T::MultisampleT::Y(sampleNum[1]),
|
||||
T::MultisampleT::Y(sampleNum[0]));
|
||||
__m256 vYSample = _mm256_set_ps(samplePos.Y(sampleNum[7]),
|
||||
samplePos.Y(sampleNum[6]),
|
||||
samplePos.Y(sampleNum[5]),
|
||||
samplePos.Y(sampleNum[4]),
|
||||
samplePos.Y(sampleNum[3]),
|
||||
samplePos.Y(sampleNum[2]),
|
||||
samplePos.Y(sampleNum[1]),
|
||||
samplePos.Y(sampleNum[0]));
|
||||
// add sample offset to UL pixel corner
|
||||
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
|
||||
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
|
||||
|
@ -398,8 +400,8 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
|
|||
|
||||
__m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
|
||||
|
||||
vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
|
||||
vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
|
||||
vXSample = _simd_set1_ps(samplePos.X(firstCoveredSampleMaskSample));
|
||||
vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
|
||||
|
||||
// blend in case 3a pixel locations
|
||||
psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
|
||||
|
@ -494,7 +496,7 @@ inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uin
|
|||
}
|
||||
|
||||
template<typename T>
|
||||
void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC &work)
|
||||
void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
|
||||
{
|
||||
psContext->pAttribs = work.pAttribs;
|
||||
psContext->pPerspAttribs = work.pPerspAttribs;
|
||||
|
@ -507,14 +509,15 @@ void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC
|
|||
|
||||
psContext->recipDet = work.recipDet;
|
||||
psContext->pRecipW = work.pRecipW;
|
||||
psContext->pSamplePosX = reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
|
||||
psContext->pSamplePosY = reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
|
||||
psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
|
||||
psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
|
||||
psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
|
||||
psContext->sampleIndex = 0;
|
||||
}
|
||||
|
||||
template<typename T, bool IsSingleSample>
|
||||
void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
|
||||
void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
|
||||
const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
|
||||
{
|
||||
if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
|
||||
{
|
||||
|
@ -530,16 +533,16 @@ void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, co
|
|||
if (T::bCentroidPos)
|
||||
{
|
||||
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
|
||||
if (T::bIsStandardPattern)
|
||||
{
|
||||
// add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
|
||||
CalcCentroidPos<T>(*psContext, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
|
||||
}
|
||||
else
|
||||
if (T::bIsCenterPattern)
|
||||
{
|
||||
psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
|
||||
psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
|
||||
}
|
||||
else
|
||||
{
|
||||
// add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
|
||||
CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
|
||||
}
|
||||
|
||||
CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
|
||||
}
|
||||
|
@ -557,8 +560,9 @@ struct PixelRateZTestLoop
|
|||
PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
|
||||
uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
|
||||
pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
|
||||
clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer) {};
|
||||
|
||||
samplePos(state.rastState.samplePositions),
|
||||
clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
|
||||
|
||||
INLINE
|
||||
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
|
||||
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
|
||||
|
@ -597,8 +601,8 @@ struct PixelRateZTestLoop
|
|||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
|
||||
|
||||
// calc I & J per sample
|
||||
CalcSampleBarycentrics(coeffs, psContext);
|
||||
|
@ -673,6 +677,7 @@ private:
|
|||
const BarycentricCoeffs& coeffs;
|
||||
const API_STATE& state;
|
||||
const SWR_PS_STATE& psState;
|
||||
const SWR_MULTISAMPLE_POS& samplePos;
|
||||
const uint8_t clipDistanceMask;
|
||||
uint8_t*& pDepthBuffer;
|
||||
uint8_t*& pStencilBuffer;
|
||||
|
@ -862,7 +867,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
SetupPixelShaderContext<T>(&psContext, work);
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
uint8_t *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
@ -887,7 +893,6 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
simdscalar activeLanes;
|
||||
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
|
||||
|
@ -904,7 +909,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
|
@ -966,7 +971,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
|||
{
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
|
||||
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
|
||||
uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
|
||||
simdscalar coverageMask, depthMask;
|
||||
if(T::bForcedSampleCount)
|
||||
{
|
||||
|
@ -1045,15 +1050,15 @@ Endtile:
|
|||
AR_END(BEPixelRateBackend, 0);
|
||||
}
|
||||
|
||||
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
|
||||
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
|
||||
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
|
||||
>
|
||||
struct SwrBackendTraits
|
||||
{
|
||||
static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
|
||||
static const bool bIsCenterPattern = (isCenter == 1);
|
||||
static const uint32_t InputCoverage = coverage;
|
||||
static const bool bCentroidPos = (centroid == 1);
|
||||
static const bool bForcedSampleCount = (forced == 1);
|
||||
static const bool bCanEarlyZ = (canEarlyZ == 1);
|
||||
typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
|
||||
typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
|
||||
};
|
||||
|
|
|
@ -640,9 +640,8 @@ void BinTriangles(
|
|||
else
|
||||
{
|
||||
// degenerate triangles won't be sent to rasterizer; just enable all edges
|
||||
pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
|
||||
(rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
|
||||
(state.scissorsTileAligned == false));
|
||||
pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
|
||||
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, (state.scissorsTileAligned == false));
|
||||
}
|
||||
|
||||
if (!triMask)
|
||||
|
@ -658,7 +657,7 @@ void BinTriangles(
|
|||
// only discard for non-MSAA case and when conservative rast is disabled
|
||||
// (xmin + 127) & ~255
|
||||
// (xmax + 128) & ~255
|
||||
if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.samplePattern == SWR_MSAA_CENTER_PATTERN) &&
|
||||
if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.bIsCenterPattern) &&
|
||||
(!CT::IsConservativeT::value))
|
||||
{
|
||||
origTriMask = triMask;
|
||||
|
@ -787,9 +786,8 @@ endBinTriangles:
|
|||
{
|
||||
// only rasterize valid edges if we have a degenerate primitive
|
||||
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
|
||||
work.pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
|
||||
(rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
|
||||
(state.scissorsTileAligned == false));
|
||||
work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
|
||||
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, (state.scissorsTileAligned == false));
|
||||
|
||||
// Degenerate triangles are required to be constant interpolated
|
||||
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
|
||||
|
|
|
@ -50,16 +50,3 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
|
|||
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
|
||||
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
|
||||
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
|
||||
{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
|
||||
{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
|
|
|
@ -58,70 +58,21 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
|
|||
// hardcoded offsets based on Direct3d standard multisample positions
|
||||
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
|
||||
// coords are 0.8 fixed point offsets from (0, 0)
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
|
||||
struct MultisampleTraits
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum) = delete;
|
||||
INLINE static __m128i vYi(uint32_t sampleNum) = delete;
|
||||
INLINE static simdscalar vX(uint32_t sampleNum) = delete;
|
||||
INLINE static simdscalar vY(uint32_t sampleNum) = delete;
|
||||
INLINE static float X(uint32_t sampleNum) = delete;
|
||||
INLINE static float Y(uint32_t sampleNum) = delete;
|
||||
INLINE static __m128i TileSampleOffsetsX() = delete;
|
||||
INLINE static __m128i TileSampleOffsetsY() = delete;
|
||||
INLINE static simdscalari FullSampleMask() = delete;
|
||||
|
||||
static const uint32_t numSamples = 0;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X = _mm_set1_epi32(samplePosXi);
|
||||
return X;
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y = _mm_set1_epi32(samplePosYi);
|
||||
return Y;
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X = _simd_set1_ps(0.5f);
|
||||
return X;
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y = _simd_set1_ps(0.5f);
|
||||
return Y;
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return samplePosX;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return samplePosY;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x80;
|
||||
static const uint32_t bboxRightEdge = 0x80;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x80;
|
||||
static const uint32_t bboxBottomEdge = 0x80;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
|
||||
|
||||
static const uint32_t samplePosXi;
|
||||
|
@ -134,43 +85,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
|
||||
|
||||
static const uint32_t numSamples = 1;
|
||||
|
@ -181,57 +99,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
static const __m128i X[numSamples] {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1])};
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
static const __m128i Y[numSamples] {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1])};
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x40;
|
||||
static const uint32_t bboxRightEdge = 0xC0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x40;
|
||||
static const uint32_t bboxBottomEdge = 0xC0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask =_simd_set1_epi32(0x3);
|
||||
|
@ -248,43 +119,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask =_simd_set1_epi32(0x3);
|
||||
|
@ -298,61 +136,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X[numSamples]
|
||||
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y[numSamples]
|
||||
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples]
|
||||
{_simd_set1_ps(0.375f), _simd_set1_ps(0.875), _simd_set1_ps(0.125), _simd_set1_ps(0.625)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples]
|
||||
{_simd_set1_ps(0.125), _simd_set1_ps(0.375f), _simd_set1_ps(0.625), _simd_set1_ps(0.875)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x20;
|
||||
static const uint32_t bboxRightEdge = 0xE0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x20;
|
||||
static const uint32_t bboxBottomEdge = 0xE0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xF);
|
||||
|
@ -369,48 +156,16 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xF);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static const uint32_t numSamples = 4;
|
||||
static const float samplePosX[4];
|
||||
static const float samplePosY[4];
|
||||
|
@ -419,65 +174,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X[numSamples]
|
||||
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
|
||||
_mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y[numSamples]
|
||||
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
|
||||
_mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples]
|
||||
{_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.8125), _simd_set1_ps(0.3125),
|
||||
_simd_set1_ps(0.1875), _simd_set1_ps(0.0625), _simd_set1_ps(0.6875), _simd_set1_ps(0.9375)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples]
|
||||
{_simd_set1_ps(0.3125), _simd_set1_ps(0.6875), _simd_set1_ps(0.5625), _simd_set1_ps(0.1875),
|
||||
_simd_set1_ps(0.8125), _simd_set1_ps(0.4375), _simd_set1_ps(0.9375), _simd_set1_ps(0.0625)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x10;
|
||||
static const uint32_t bboxRightEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x10;
|
||||
static const uint32_t bboxBottomEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFF);
|
||||
|
@ -494,43 +194,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFF);
|
||||
|
@ -544,73 +211,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X[numSamples]
|
||||
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
|
||||
_mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7]),
|
||||
_mm_set1_epi32(samplePosXi[8]), _mm_set1_epi32(samplePosXi[9]), _mm_set1_epi32(samplePosXi[10]), _mm_set1_epi32(samplePosXi[11]),
|
||||
_mm_set1_epi32(samplePosXi[12]), _mm_set1_epi32(samplePosXi[13]), _mm_set1_epi32(samplePosXi[14]), _mm_set1_epi32(samplePosXi[15])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y[numSamples]
|
||||
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
|
||||
_mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7]),
|
||||
_mm_set1_epi32(samplePosYi[8]), _mm_set1_epi32(samplePosYi[9]), _mm_set1_epi32(samplePosYi[10]), _mm_set1_epi32(samplePosYi[11]),
|
||||
_mm_set1_epi32(samplePosYi[12]), _mm_set1_epi32(samplePosYi[13]), _mm_set1_epi32(samplePosYi[14]), _mm_set1_epi32(samplePosYi[15])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples]
|
||||
{_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.3125), _simd_set1_ps(0.7500),
|
||||
_simd_set1_ps(0.1875), _simd_set1_ps(0.6250), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875),
|
||||
_simd_set1_ps(0.3750), _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.1250),
|
||||
_simd_set1_ps(0.0000), _simd_set1_ps(0.9375), _simd_set1_ps(0.8750), _simd_set1_ps(0.0625)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples]
|
||||
{_simd_set1_ps(0.5625), _simd_set1_ps(0.3125), _simd_set1_ps(0.6250), _simd_set1_ps(0.4375),
|
||||
_simd_set1_ps(0.3750), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875), _simd_set1_ps(0.1875),
|
||||
_simd_set1_ps(0.8750), _simd_set1_ps(0.0625), _simd_set1_ps(0.1250), _simd_set1_ps(0.7500),
|
||||
_simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.9375), _simd_set1_ps(0.0000)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x00;
|
||||
static const uint32_t bboxRightEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x00;
|
||||
static const uint32_t bboxBottomEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
|
||||
|
@ -627,43 +231,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
|
|||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
return _simd_set1_ps(0.5f);
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
// BR, BL, UR, UL
|
||||
return _mm_set1_epi32(0x80);
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
|
||||
|
@ -675,3 +246,50 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
|
|||
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
|
||||
static const uint32_t numCoverageSamples = 1;
|
||||
};
|
||||
|
||||
INLINE
|
||||
bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
|
||||
{
|
||||
// detect if we're using standard or center sample patterns
|
||||
const uint32_t *standardPosX, *standardPosY;
|
||||
switch(sampleCount)
|
||||
{
|
||||
case SWR_MULTISAMPLE_1X:
|
||||
standardPosX = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
|
||||
standardPosY = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosYi;
|
||||
break;
|
||||
case SWR_MULTISAMPLE_2X:
|
||||
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi;
|
||||
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi;
|
||||
break;
|
||||
case SWR_MULTISAMPLE_4X:
|
||||
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi;
|
||||
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi;
|
||||
break;
|
||||
case SWR_MULTISAMPLE_8X:
|
||||
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi;
|
||||
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi;
|
||||
break;
|
||||
case SWR_MULTISAMPLE_16X:
|
||||
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi;
|
||||
standardPosY = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// scan sample pattern for standard or center
|
||||
uint32_t numSamples = GetNumSamples(sampleCount);
|
||||
bool bIsStandard = true;
|
||||
if(numSamples > 1)
|
||||
{
|
||||
for(uint32_t i = 0; i < numSamples; i++)
|
||||
{
|
||||
bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
|
||||
(standardPosY[i] == samplePos.Yi(i));
|
||||
if(!bIsStandard)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return !bIsStandard;
|
||||
}
|
|
@ -1118,8 +1118,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
__m256d vEdgeTileBbox[3];
|
||||
if (NumCoverageSamplesT::value > 1)
|
||||
{
|
||||
__m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX();
|
||||
__m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY();
|
||||
const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
|
||||
const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
|
||||
const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
|
||||
|
||||
__m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
|
||||
__m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
|
||||
|
@ -1206,8 +1207,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
|||
}
|
||||
else
|
||||
{
|
||||
__m128i vSampleOffsetXh = RT::MT::vXi(sampleNum);
|
||||
__m128i vSampleOffsetYh = RT::MT::vYi(sampleNum);
|
||||
const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
|
||||
__m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
|
||||
__m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
|
||||
__m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
|
||||
__m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
|
||||
|
||||
|
@ -1340,7 +1342,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
|
|||
// setup triangle rasterizer function
|
||||
PFN_WORK_FUNC pfnTriRast;
|
||||
// conservative rast not supported for points/lines
|
||||
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
|
||||
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
|
||||
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
|
||||
|
||||
// overwrite texcoords for point sprites
|
||||
|
@ -1673,7 +1675,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
// setup triangle rasterizer function
|
||||
PFN_WORK_FUNC pfnTriRast;
|
||||
// conservative rast not supported for points/lines
|
||||
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
|
||||
pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
|
||||
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
|
||||
|
||||
// make sure this macrotile intersects the triangle
|
||||
|
|
|
@ -115,8 +115,7 @@ template <typename NumSamplesT, typename CenterPatternT, typename ConservativeT,
|
|||
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
|
||||
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
|
||||
{
|
||||
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
|
||||
(CenterPatternT::value ? SWR_MSAA_CENTER_PATTERN : SWR_MSAA_STANDARD_PATTERN)> MT;
|
||||
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
|
||||
|
||||
/// Fixed point precision the rasterizer is using
|
||||
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
|
||||
#include "common/formats.h"
|
||||
#include "common/simdintrin.h"
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// PRIMITIVE_TOPOLOGY.
|
||||
|
@ -333,8 +335,7 @@ struct SWR_PS_CONTEXT
|
|||
|
||||
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
|
||||
|
||||
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
|
||||
// IN: Pointers to render target hottiles
|
||||
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -909,13 +910,6 @@ enum SWR_FRONTWINDING
|
|||
};
|
||||
|
||||
|
||||
enum SWR_MSAA_SAMPLE_PATTERN
|
||||
{
|
||||
SWR_MSAA_CENTER_PATTERN,
|
||||
SWR_MSAA_STANDARD_PATTERN,
|
||||
SWR_MSAA_SAMPLE_PATTERN_COUNT
|
||||
};
|
||||
|
||||
enum SWR_PIXEL_LOCATION
|
||||
{
|
||||
SWR_PIXEL_LOCATION_CENTER,
|
||||
|
@ -925,16 +919,75 @@ enum SWR_PIXEL_LOCATION
|
|||
// fixed point screen space sample locations within a pixel
|
||||
struct SWR_MULTISAMPLE_POS
|
||||
{
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
};
|
||||
public:
|
||||
INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
|
||||
INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
|
||||
INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
|
||||
INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
|
||||
INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
|
||||
INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
|
||||
INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
|
||||
INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
|
||||
typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
|
||||
INLINE sampleArrayT X() const { return _x; }; // @llvm_func
|
||||
INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
|
||||
INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
|
||||
INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
|
||||
INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
|
||||
INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
|
||||
INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
|
||||
INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
|
||||
|
||||
INLINE void PrecalcSampleData(int numSamples) // @llvm_func_start
|
||||
{
|
||||
for(int i = 0; i < numSamples; i++)
|
||||
{
|
||||
_vXi[i] = _mm_set1_epi32(_xi[i]);
|
||||
_vYi[i] = _mm_set1_epi32(_yi[i]);
|
||||
_vX[i] = _simd_set1_ps(_x[i]);
|
||||
_vY[i] = _simd_set1_ps(_y[i]);
|
||||
}
|
||||
// precalculate the raster tile BB for the rasterizer.
|
||||
CalcTileSampleOffsets(numSamples);
|
||||
} // @llvm_func_end
|
||||
|
||||
|
||||
private:
|
||||
INLINE void CalcTileSampleOffsets(int numSamples) // @llvm_func_start
|
||||
{
|
||||
auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
|
||||
{
|
||||
__m128i vMin = _mm_set1_epi32(*min);
|
||||
__m128i vMax = _mm_set1_epi32(*max);
|
||||
return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
|
||||
};
|
||||
|
||||
auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
|
||||
auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
|
||||
std::integral_constant<int, 0xA> xMask;
|
||||
// BR(max), BL(min), UR(max), UL(min)
|
||||
tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
|
||||
|
||||
auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
|
||||
auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
|
||||
std::integral_constant<int, 0xC> yMask;
|
||||
// BR(max), BL(min), UR(max), UL(min)
|
||||
tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
|
||||
}; // @llvm_func_end
|
||||
// scalar sample values
|
||||
uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
|
||||
uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
|
||||
float _x[SWR_MAX_NUM_MULTISAMPLES];
|
||||
float _y[SWR_MAX_NUM_MULTISAMPLES];
|
||||
|
||||
// precalc'd / vectorized samples
|
||||
__m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
|
||||
__m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
|
||||
simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
|
||||
simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
|
||||
__m128i tileSampleOffsetsX;
|
||||
__m128i tileSampleOffsetsY;
|
||||
|
||||
enum SWR_MSAA_RASTMODE
|
||||
{
|
||||
SWR_MSAA_RASTMODE_OFF_PIXEL,
|
||||
SWR_MSAA_RASTMODE_OFF_PATTERN,
|
||||
SWR_MSAA_RASTMODE_ON_PIXEL,
|
||||
SWR_MSAA_RASTMODE_ON_PATTERN
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -951,7 +1004,6 @@ struct SWR_RASTSTATE
|
|||
uint32_t pointParam : 1;
|
||||
uint32_t pointSpriteEnable : 1;
|
||||
uint32_t pointSpriteTopOrigin : 1;
|
||||
uint32_t msaaRastEnable : 1;
|
||||
uint32_t forcedSampleCount : 1;
|
||||
uint32_t pixelOffset : 1;
|
||||
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
|
||||
|
@ -965,15 +1017,11 @@ struct SWR_RASTSTATE
|
|||
float depthBiasClamp;
|
||||
SWR_FORMAT depthFormat; // @llvm_enum
|
||||
|
||||
///@todo: MSAA lines
|
||||
// multisample state for MSAA lines
|
||||
SWR_MSAA_RASTMODE rastMode; // @llvm_enum
|
||||
|
||||
// sample count the rasterizer is running at
|
||||
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
|
||||
uint32_t pixelLocation; // UL or Center
|
||||
SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
|
||||
SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
|
||||
SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
|
||||
bool bIsCenterPattern; // @llvm_enum
|
||||
|
||||
// user clip/cull distance enables
|
||||
uint8_t cullDistanceMask;
|
||||
|
|
|
@ -1061,8 +1061,6 @@ swr_update_derived(struct pipe_context *pipe,
|
|||
rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
|
||||
|
||||
/* XXX TODO: Add multisample */
|
||||
rastState->msaaRastEnable = false;
|
||||
rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
|
||||
rastState->sampleCount = SWR_MULTISAMPLE_1X;
|
||||
rastState->forcedSampleCount = false;
|
||||
|
||||
|
|
Loading…
Reference in New Issue