swr/rast: Pull most of the VPAI manipulation out of the binner/clipper
Move out of binner/clipper; hand them down from the frontend code instead. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
f882891684
commit
8b06920796
|
@ -307,7 +307,8 @@ void SIMDCALL BinTrianglesImpl(
|
|||
uint32_t workerId,
|
||||
typename SIMD_T::Vec4 tri[3],
|
||||
uint32_t triMask,
|
||||
typename SIMD_T::Integer const &primID)
|
||||
typename SIMD_T::Integer const &primID,
|
||||
typename SIMD_T::Integer const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
|
@ -323,31 +324,6 @@ void SIMDCALL BinTrianglesImpl(
|
|||
typename SIMD_T::Float vRecipW1 = SIMD_T::set1_ps(1.0f);
|
||||
typename SIMD_T::Float vRecipW2 = SIMD_T::set1_ps(1.0f);
|
||||
|
||||
typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
|
||||
typename SIMD_T::Vec4 vpiAttrib[3];
|
||||
typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
|
||||
vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
}
|
||||
|
||||
|
||||
if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
|
||||
{
|
||||
// OOB indices => forced to zero.
|
||||
vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
|
||||
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
|
||||
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
|
||||
}
|
||||
else
|
||||
{
|
||||
viewportIdx = vpai;
|
||||
}
|
||||
|
||||
if (feState.vpTransformDisable)
|
||||
{
|
||||
// RHW is passed in directly when VP transform is disabled
|
||||
|
@ -375,7 +351,7 @@ void SIMDCALL BinTrianglesImpl(
|
|||
tri[2].v[2] = SIMD_T::mul_ps(tri[2].v[2], vRecipW2);
|
||||
|
||||
// Viewport transform to screen space coords
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
if (pa.viewportArrayActive)
|
||||
{
|
||||
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
|
||||
}
|
||||
|
@ -568,8 +544,8 @@ void SIMDCALL BinTrianglesImpl(
|
|||
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
|
||||
{
|
||||
typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax;
|
||||
if (pa.viewportArrayActive)
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
|
||||
}
|
||||
|
@ -786,9 +762,10 @@ void BinTriangles(
|
|||
uint32_t workerId,
|
||||
simdvector tri[3],
|
||||
uint32_t triMask,
|
||||
simdscalari const &primID)
|
||||
simdscalari const &primID,
|
||||
simdscalari const &viewportIdx)
|
||||
{
|
||||
BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID);
|
||||
BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
|
@ -799,9 +776,10 @@ void SIMDCALL BinTriangles_simd16(
|
|||
uint32_t workerId,
|
||||
simd16vector tri[3],
|
||||
uint32_t triMask,
|
||||
simd16scalari const &primID)
|
||||
simd16scalari const &primID,
|
||||
simd16scalari const &viewportIdx)
|
||||
{
|
||||
BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID);
|
||||
BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1026,7 +1004,7 @@ void BinPostSetupPointsImpl(
|
|||
{
|
||||
typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax;
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
if (pa.viewportArrayActive)
|
||||
{
|
||||
GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
|
||||
}
|
||||
|
@ -1176,38 +1154,13 @@ void BinPointsImpl(
|
|||
uint32_t workerId,
|
||||
typename SIMD_T::Vec4 prim[3],
|
||||
uint32_t primMask,
|
||||
typename SIMD_T::Integer const &primID)
|
||||
typename SIMD_T::Integer const &primID,
|
||||
typename SIMD_T::Integer const &viewportIdx)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_FRONTEND_STATE& feState = state.frontendState;
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
|
||||
// Read back viewport index if required
|
||||
typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
|
||||
typename SIMD_T::Vec4 vpiAttrib[1];
|
||||
typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
|
||||
vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
}
|
||||
|
||||
|
||||
if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
|
||||
{
|
||||
// OOB indices => forced to zero.
|
||||
vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
|
||||
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
|
||||
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
|
||||
}
|
||||
else
|
||||
{
|
||||
viewportIdx = vpai;
|
||||
}
|
||||
|
||||
if (!feState.vpTransformDisable)
|
||||
{
|
||||
// perspective divide
|
||||
|
@ -1218,7 +1171,7 @@ void BinPointsImpl(
|
|||
prim[0].z = SIMD_T::mul_ps(prim[0].z, vRecipW0);
|
||||
|
||||
// viewport transform to screen coords
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
if (pa.viewportArrayActive)
|
||||
{
|
||||
viewportTransform<1>(prim, state.vpMatrices, viewportIdx);
|
||||
}
|
||||
|
@ -1249,7 +1202,8 @@ void BinPoints(
|
|||
uint32_t workerId,
|
||||
simdvector prim[3],
|
||||
uint32_t primMask,
|
||||
simdscalari const &primID)
|
||||
simdscalari const &primID,
|
||||
simdscalari const &viewportIdx)
|
||||
{
|
||||
BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
|
||||
pDC,
|
||||
|
@ -1257,7 +1211,8 @@ void BinPoints(
|
|||
workerId,
|
||||
prim,
|
||||
primMask,
|
||||
primID);
|
||||
primID,
|
||||
viewportIdx);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
|
@ -1267,7 +1222,8 @@ void SIMDCALL BinPoints_simd16(
|
|||
uint32_t workerId,
|
||||
simd16vector prim[3],
|
||||
uint32_t primMask,
|
||||
simd16scalari const &primID)
|
||||
simd16scalari const &primID,
|
||||
simd16scalari const &viewportIdx)
|
||||
{
|
||||
BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
|
||||
pDC,
|
||||
|
@ -1275,7 +1231,8 @@ void SIMDCALL BinPoints_simd16(
|
|||
workerId,
|
||||
prim,
|
||||
primMask,
|
||||
primID);
|
||||
primID,
|
||||
viewportIdx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1362,7 +1319,7 @@ void BinPostSetupLinesImpl(
|
|||
{
|
||||
typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax;
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
if (pa.viewportArrayActive)
|
||||
{
|
||||
GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
|
||||
}
|
||||
|
@ -1513,7 +1470,8 @@ void SIMDCALL BinLinesImpl(
|
|||
uint32_t workerId,
|
||||
typename SIMD_T::Vec4 prim[3],
|
||||
uint32_t primMask,
|
||||
typename SIMD_T::Integer const &primID)
|
||||
typename SIMD_T::Integer const &primID,
|
||||
typename SIMD_T::Integer const &viewportIdx)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
|
@ -1521,26 +1479,6 @@ void SIMDCALL BinLinesImpl(
|
|||
|
||||
typename SIMD_T::Float vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) };
|
||||
|
||||
typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
|
||||
typename SIMD_T::Vec4 vpiAttrib[2];
|
||||
typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
}
|
||||
|
||||
|
||||
if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
|
||||
{
|
||||
// OOB indices => forced to zero.
|
||||
vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
|
||||
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
|
||||
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
|
||||
}
|
||||
|
||||
if (!feState.vpTransformDisable)
|
||||
{
|
||||
// perspective divide
|
||||
|
@ -1557,7 +1495,7 @@ void SIMDCALL BinLinesImpl(
|
|||
prim[1].v[2] = SIMD_T::mul_ps(prim[1].v[2], vRecipW[1]);
|
||||
|
||||
// viewport transform to screen coords
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
if (pa.viewportArrayActive)
|
||||
{
|
||||
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
|
||||
}
|
||||
|
@ -1593,9 +1531,10 @@ void BinLines(
|
|||
uint32_t workerId,
|
||||
simdvector prim[],
|
||||
uint32_t primMask,
|
||||
simdscalari const &primID)
|
||||
simdscalari const &primID,
|
||||
simdscalari const &viewportIdx)
|
||||
{
|
||||
BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID);
|
||||
BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
|
@ -1605,9 +1544,10 @@ void SIMDCALL BinLines_simd16(
|
|||
uint32_t workerId,
|
||||
simd16vector prim[3],
|
||||
uint32_t primMask,
|
||||
simd16scalari const &primID)
|
||||
simd16scalari const &primID,
|
||||
simd16scalari const &viewportIdx)
|
||||
{
|
||||
BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID);
|
||||
BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -160,35 +160,35 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
|
|||
return i;
|
||||
}
|
||||
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId)
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipTriangles, pDC->drawId);
|
||||
Clipper<SIMD256, 3> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
AR_END(FEClipTriangles, 1);
|
||||
}
|
||||
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId)
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipLines, pDC->drawId);
|
||||
Clipper<SIMD256, 2> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
AR_END(FEClipLines, 1);
|
||||
}
|
||||
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId)
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipPoints, pDC->drawId);
|
||||
Clipper<SIMD256, 1> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
AR_END(FEClipPoints, 1);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId)
|
||||
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipTriangles, pDC->drawId);
|
||||
|
@ -198,12 +198,12 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor
|
|||
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
|
||||
AR_END(FEClipTriangles, 1);
|
||||
}
|
||||
|
||||
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId)
|
||||
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipLines, pDC->drawId);
|
||||
|
@ -213,12 +213,12 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
|
|||
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
|
||||
AR_END(FEClipLines, 1);
|
||||
}
|
||||
|
||||
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId)
|
||||
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipPoints, pDC->drawId);
|
||||
|
@ -228,10 +228,9 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker
|
|||
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
|
||||
AR_END(FEClipPoints, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -178,11 +178,11 @@ struct BinnerChooser<SIMD256>
|
|||
};
|
||||
}
|
||||
|
||||
void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID)
|
||||
void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx)
|
||||
{
|
||||
SWR_ASSERT(pfnBinFunc != nullptr);
|
||||
|
||||
pfnBinFunc(pDC, pa, workerId, prims, primMask, primID);
|
||||
pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -231,11 +231,11 @@ struct BinnerChooser<SIMD512>
|
|||
};
|
||||
}
|
||||
|
||||
void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID)
|
||||
void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx)
|
||||
{
|
||||
SWR_ASSERT(pfnBinFunc != nullptr);
|
||||
|
||||
pfnBinFunc(pDC, pa, workerId, prims, primMask, primID);
|
||||
pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -437,7 +437,7 @@ public:
|
|||
return SIMD_T::movemask_ps(vClipCullMask);
|
||||
}
|
||||
|
||||
void ClipSimd(const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId)
|
||||
void ClipSimd(const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx)
|
||||
{
|
||||
// input/output vertex store for clipper
|
||||
SIMDVERTEX_T<SIMD_T> vertices[7]; // maximum 7 verts generated per triangle
|
||||
|
@ -538,6 +538,7 @@ public:
|
|||
|
||||
const uint32_t *pVertexCount = reinterpret_cast<const uint32_t *>(&vNumClippedVerts);
|
||||
const uint32_t *pPrimitiveId = reinterpret_cast<const uint32_t *>(&vPrimId);
|
||||
const uint32_t *pViewportIdx = reinterpret_cast<const uint32_t *>(&vViewportIdx);
|
||||
|
||||
const SIMD256::Integer vOffsets = SIMD256::set_epi32(
|
||||
0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
|
||||
|
@ -642,12 +643,14 @@ public:
|
|||
}
|
||||
|
||||
PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast<uint8_t *>(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology);
|
||||
clipPA.viewportArrayActive = pa.viewportArrayActive;
|
||||
|
||||
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f };
|
||||
|
||||
const uint32_t primMask = primMaskMap[numEmittedPrims];
|
||||
|
||||
const typename SIMD_T::Integer primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
|
||||
const typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
|
||||
|
||||
while (clipPA.GetNextStreamOutput())
|
||||
{
|
||||
|
@ -659,7 +662,7 @@ public:
|
|||
|
||||
if (assemble)
|
||||
{
|
||||
binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID);
|
||||
binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx);
|
||||
}
|
||||
|
||||
} while (clipPA.NextPrim());
|
||||
|
@ -674,7 +677,7 @@ public:
|
|||
UPDATE_STAT_FE(CPrimitives, numClippedPrims);
|
||||
}
|
||||
|
||||
void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId)
|
||||
void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx)
|
||||
{
|
||||
SWR_ASSERT(pa.pDC != nullptr);
|
||||
|
||||
|
@ -686,31 +689,6 @@ public:
|
|||
uint32_t numInvoc = _mm_popcnt_u32(primMask);
|
||||
UPDATE_STAT_FE(CInvocations, numInvoc);
|
||||
|
||||
// Read back viewport index if required
|
||||
typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si();
|
||||
typename SIMD_T::Vec4 vpiAttrib[NumVertsPerPrim];
|
||||
typename SIMD_T::Integer vpai = SIMD_T::setzero_si();
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
}
|
||||
|
||||
|
||||
if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0
|
||||
{
|
||||
// OOB indices => forced to zero.
|
||||
vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si());
|
||||
typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports);
|
||||
viewportIdx = SIMD_T::and_si(vClearMask, vpai);
|
||||
}
|
||||
else
|
||||
{
|
||||
viewportIdx = vpai;
|
||||
}
|
||||
|
||||
ComputeClipCodes(prim, viewportIdx);
|
||||
|
||||
// cull prims with NAN coords
|
||||
|
@ -738,7 +716,7 @@ public:
|
|||
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
|
||||
// we have to clip tris, execute the clipper, which will also
|
||||
// call the binner
|
||||
ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId);
|
||||
ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx);
|
||||
AR_END(FEGuardbandClip, 1);
|
||||
}
|
||||
else if (validMask)
|
||||
|
@ -747,7 +725,7 @@ public:
|
|||
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
|
||||
|
||||
// forward valid prims directly to binner
|
||||
binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId);
|
||||
binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1157,12 +1135,12 @@ private:
|
|||
|
||||
|
||||
// pipeline stage functions
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId);
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId);
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId);
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId);
|
||||
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId);
|
||||
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId);
|
||||
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
|
||||
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
|
||||
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -214,12 +214,12 @@ struct PA_STATE;
|
|||
|
||||
// function signature for pipeline stages that execute after primitive assembly
|
||||
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
|
||||
uint32_t primMask, simdscalari const &primID);
|
||||
uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
|
||||
|
||||
#if ENABLE_AVX512_SIMD16
|
||||
// function signature for pipeline stages that execute after primitive assembly
|
||||
typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
|
||||
uint32_t primMask, simd16scalari const &primID);
|
||||
uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
|
||||
|
||||
#endif
|
||||
OSALIGNLINE(struct) API_STATE
|
||||
|
|
|
@ -988,13 +988,48 @@ static void GeometryShaderStage(
|
|||
{
|
||||
#if USE_SIMD16_FRONTEND
|
||||
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
|
||||
|
||||
// Gather the VPAI from the SVG if provided.
|
||||
SIMD16::Vec4 vpiAttrib[3];
|
||||
SIMD16::Integer vViewportIdx = SIMD16::setzero_si();
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vViewportIdx = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
gsPa.viewportArrayActive = true;
|
||||
}
|
||||
|
||||
{
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
|
||||
|
||||
gsPa.useAlternateOffset = false;
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
|
||||
}
|
||||
#else
|
||||
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
|
||||
|
||||
// Gather the VPAI from the SVG if provided.
|
||||
SIMD8::Vec4 vpiAttrib[3];
|
||||
SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
|
||||
|
||||
gsPa.viewportArrayActive = true;
|
||||
}
|
||||
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -1337,14 +1372,46 @@ static void TessellationStages(
|
|||
|
||||
SWR_ASSERT(pfnClipFunc);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
// Gather the VPAI from the SVG if provided.
|
||||
simd16scalari vpai = SIMD16::setzero_si();
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
simd16vector vpiAttrib[4];
|
||||
tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
tessPa.viewportArrayActive = true;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
|
||||
vpai = SIMD16::and_si(vClearMask, vpai);
|
||||
|
||||
tessPa.useAlternateOffset = false;
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID);
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
|
||||
}
|
||||
#else
|
||||
// Gather the VPAI from the SVG if provided.
|
||||
SIMD8::Vec4 vpiAttrib[3];
|
||||
SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
|
||||
|
||||
tessPa.viewportArrayActive = true;
|
||||
}
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim,
|
||||
GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
|
||||
GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -1736,9 +1803,25 @@ void ProcessDraw(
|
|||
if (HasRastT::value)
|
||||
{
|
||||
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
|
||||
// Gather the VPAI from the SVG if provided.
|
||||
simd16scalari vpai = SIMD16::setzero_si();
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
simd16vector vpiAttrib[4];
|
||||
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
pa.viewportArrayActive = true;
|
||||
}
|
||||
|
||||
{
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
|
||||
vpai = SIMD16::and_si(vClearMask, vpai);
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
|
||||
pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1900,8 +1983,25 @@ void ProcessDraw(
|
|||
{
|
||||
SWR_ASSERT(pDC->pState->pfnProcessPrims);
|
||||
|
||||
// Gather the VPAI from the SVG if provided.
|
||||
SIMD8::Vec4 vpiAttrib[3];
|
||||
SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
|
||||
vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
|
||||
|
||||
pa.viewportArrayActive = true;
|
||||
}
|
||||
|
||||
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
|
||||
GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
|
||||
GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -389,10 +389,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
|
|||
#endif
|
||||
|
||||
struct PA_STATE_BASE; // forward decl
|
||||
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID);
|
||||
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID);
|
||||
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
|
||||
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID);
|
||||
void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID);
|
||||
void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
|
||||
void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -77,9 +77,11 @@ struct PA_STATE
|
|||
|
||||
#if ENABLE_AVX512_SIMD16
|
||||
bool useAlternateOffset{ false };
|
||||
#endif
|
||||
|
||||
bool viewportArrayActive{ false };
|
||||
uint32_t numVertsPerPrim{ 0 };
|
||||
|
||||
#endif
|
||||
PA_STATE(){}
|
||||
PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, uint32_t in_numVertsPerPrim) :
|
||||
pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim) {}
|
||||
|
|
Loading…
Reference in New Issue