swr: [rasterizer core] SIMD16 Frontend WIP
Implement widened clipper and binner interfaces for SIMD16. Reviewed-by: George Kyriazis <george.kyriazis@intel.com>
This commit is contained in:
parent
aea737e12e
commit
aee5276375
|
@ -839,11 +839,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
}
|
||||
|
||||
PFN_PROCESS_PRIMS pfnBinner;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16;
|
||||
#endif
|
||||
switch (pState->state.topology)
|
||||
{
|
||||
case TOP_POINT_LIST:
|
||||
pState->pfnProcessPrims = ClipPoints;
|
||||
pfnBinner = BinPoints;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
pState->pfnProcessPrims_simd16 = ClipPoints_simd16;
|
||||
pfnBinner_simd16 = BinPoints_simd16;
|
||||
#endif
|
||||
break;
|
||||
case TOP_LINE_LIST:
|
||||
case TOP_LINE_STRIP:
|
||||
|
@ -852,10 +859,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
case TOP_LISTSTRIP_ADJ:
|
||||
pState->pfnProcessPrims = ClipLines;
|
||||
pfnBinner = BinLines;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
pState->pfnProcessPrims_simd16 = ClipLines_simd16;
|
||||
pfnBinner_simd16 = BinLines_simd16;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
pState->pfnProcessPrims = ClipTriangles;
|
||||
pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
|
||||
#if USE_SIMD16_FRONTEND
|
||||
pState->pfnProcessPrims_simd16 = ClipTriangles_simd16;
|
||||
pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0));
|
||||
#endif
|
||||
break;
|
||||
};
|
||||
|
||||
|
@ -864,6 +879,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
if (pState->state.frontendState.vpTransformDisable)
|
||||
{
|
||||
pState->pfnProcessPrims = pfnBinner;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
pState->pfnProcessPrims_simd16 = pfnBinner_simd16;
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((pState->state.psState.pfnPixelShader == nullptr) &&
|
||||
|
@ -874,11 +892,17 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
(pState->state.backendState.numAttributes == 0))
|
||||
{
|
||||
pState->pfnProcessPrims = nullptr;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
pState->pfnProcessPrims_simd16 = nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (pState->state.soState.rasterizerDisable == true)
|
||||
{
|
||||
pState->pfnProcessPrims = nullptr;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
pState->pfnProcessPrims_simd16 = nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -856,6 +856,58 @@ endBinTriangles:
|
|||
AR_END(FEBinTriangles, 1);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
inline uint32_t GetPrimMaskLo(uint32_t primMask)
|
||||
{
|
||||
return primMask & 255;
|
||||
}
|
||||
|
||||
inline uint32_t GetPrimMaskHi(uint32_t primMask)
|
||||
{
|
||||
return (primMask >> 8) & 255;
|
||||
}
|
||||
|
||||
template <typename CT>
|
||||
void BinTriangles_simd16(
|
||||
DRAW_CONTEXT *pDC,
|
||||
PA_STATE& pa,
|
||||
uint32_t workerId,
|
||||
simd16vector tri[3],
|
||||
uint32_t triMask,
|
||||
simd16scalari primID,
|
||||
simd16scalari viewportIdx)
|
||||
{
|
||||
enum { VERTS_PER_PRIM = 3 };
|
||||
|
||||
simdvector verts[VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(tri[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
BinTriangles<CT>(pDC, pa, workerId, verts, GetPrimMaskLo(triMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0));
|
||||
|
||||
if (GetPrimMaskHi(triMask))
|
||||
{
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(tri[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
BinTriangles<CT>(pDC, pa, workerId, verts, GetPrimMaskHi(triMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
struct FEBinTrianglesChooser
|
||||
{
|
||||
typedef PFN_PROCESS_PRIMS FuncType;
|
||||
|
@ -873,6 +925,25 @@ PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative)
|
|||
return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
struct FEBinTrianglesChooser_simd16
|
||||
{
|
||||
typedef PFN_PROCESS_PRIMS_SIMD16 FuncType;
|
||||
|
||||
template <typename... ArgsB>
|
||||
static FuncType GetFunc()
|
||||
{
|
||||
return BinTriangles_simd16<ConservativeRastFETraits<ArgsB...>>;
|
||||
}
|
||||
};
|
||||
|
||||
// Selector for correct templated BinTrinagles function
|
||||
PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative)
|
||||
{
|
||||
return TemplateArgUnroller<FEBinTrianglesChooser_simd16>::GetFunc(IsConservative);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Bin SIMD points to the backend. Only supports point size of 1
|
||||
|
@ -1217,6 +1288,47 @@ void BinPoints(
|
|||
AR_END(FEBinPoints, 1);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void BinPoints_simd16(
|
||||
DRAW_CONTEXT *pDC,
|
||||
PA_STATE& pa,
|
||||
uint32_t workerId,
|
||||
simd16vector prim[3],
|
||||
uint32_t primMask,
|
||||
simd16scalari primID,
|
||||
simd16scalari viewportIdx)
|
||||
{
|
||||
enum { VERTS_PER_PRIM = 1 };
|
||||
|
||||
simdvector verts[VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prim[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
BinPoints(pDC, pa, workerId, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0));
|
||||
|
||||
if (GetPrimMaskHi(primMask))
|
||||
{
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prim[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
BinPoints(pDC, pa, workerId, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Bin SIMD lines to the backend.
|
||||
/// @param pDC - pointer to draw context.
|
||||
|
@ -1503,3 +1615,45 @@ void BinLines(
|
|||
primID,
|
||||
viewportIdx);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void BinLines_simd16(
|
||||
DRAW_CONTEXT *pDC,
|
||||
PA_STATE& pa,
|
||||
uint32_t workerId,
|
||||
simd16vector prim[3],
|
||||
uint32_t primMask,
|
||||
simd16scalari primID,
|
||||
simd16scalari viewportIdx)
|
||||
{
|
||||
enum { VERTS_PER_PRIM = 2 };
|
||||
|
||||
simdvector verts[VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prim[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
BinLines(pDC, pa, workerId, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0));
|
||||
|
||||
if (GetPrimMaskHi(primMask))
|
||||
{
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prim[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
BinLines(pDC, pa, workerId, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -174,6 +174,7 @@ void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector pr
|
|||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
AR_END(FEClipLines, 1);
|
||||
}
|
||||
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
@ -183,3 +184,133 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p
|
|||
AR_END(FEClipPoints, 1);
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
inline uint32_t GetPrimMaskLo(uint32_t primMask)
|
||||
{
|
||||
return primMask & 255;
|
||||
}
|
||||
|
||||
inline uint32_t GetPrimMaskHi(uint32_t primMask)
|
||||
{
|
||||
return (primMask >> 8) & 255;
|
||||
}
|
||||
|
||||
void ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipTriangles, pDC->drawId);
|
||||
|
||||
enum { VERTS_PER_PRIM = 3 };
|
||||
|
||||
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
|
||||
|
||||
simdvector verts[VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prims[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
clipper.ExecuteStage(pa, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primId, 0), _simd16_extract_si(viewportIdx, 0));
|
||||
|
||||
if (GetPrimMaskHi(primMask))
|
||||
{
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prims[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
clipper.ExecuteStage(pa, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primId, 1), _simd16_extract_si(viewportIdx, 1));
|
||||
}
|
||||
|
||||
AR_END(FEClipTriangles, 1);
|
||||
}
|
||||
|
||||
void ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipLines, pDC->drawId);
|
||||
|
||||
enum { VERTS_PER_PRIM = 2 };
|
||||
|
||||
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
|
||||
|
||||
simdvector verts[VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prims[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
clipper.ExecuteStage(pa, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primId, 0), _simd16_extract_si(viewportIdx, 0));
|
||||
|
||||
if (GetPrimMaskHi(primMask))
|
||||
{
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prims[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
clipper.ExecuteStage(pa, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primId, 1), _simd16_extract_si(viewportIdx, 1));
|
||||
}
|
||||
|
||||
AR_END(FEClipLines, 1);
|
||||
}
|
||||
|
||||
void ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipPoints, pDC->drawId);
|
||||
|
||||
enum { VERTS_PER_PRIM = 1 };
|
||||
|
||||
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
|
||||
|
||||
simdvector verts[VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prims[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
clipper.ExecuteStage(pa, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primId, 0), _simd16_extract_si(viewportIdx, 0));
|
||||
|
||||
if (GetPrimMaskHi(primMask))
|
||||
{
|
||||
for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
verts[i][j] = _simd16_extract_ps(prims[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
clipper.ExecuteStage(pa, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primId, 1), _simd16_extract_si(viewportIdx, 1));
|
||||
}
|
||||
|
||||
AR_END(FEClipPoints, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -969,3 +969,9 @@ private:
|
|||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
|
||||
void ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
|
||||
void ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -363,6 +363,9 @@ struct DRAW_STATE
|
|||
// pipeline function pointers, filled in by API thread when setting up the draw
|
||||
BACKEND_FUNCS backendFuncs;
|
||||
PFN_PROCESS_PRIMS pfnProcessPrims;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PFN_PROCESS_PRIMS_SIMD16 pfnProcessPrims_simd16;
|
||||
#endif
|
||||
|
||||
CachingArena* pArena; // This should only be used by API thread.
|
||||
};
|
||||
|
|
|
@ -841,6 +841,20 @@ static void GeometryShaderStage(
|
|||
}
|
||||
|
||||
// set up new binner and state for the GS output topology
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PFN_PROCESS_PRIMS_SIMD16 pfnClipFunc = nullptr;
|
||||
if (HasRastT::value)
|
||||
{
|
||||
switch (pState->outputTopology)
|
||||
{
|
||||
case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles_simd16; break;
|
||||
case TOP_LINE_STRIP: pfnClipFunc = ClipLines_simd16; break;
|
||||
case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break;
|
||||
default: SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
|
||||
if (HasRastT::value)
|
||||
{
|
||||
|
@ -853,6 +867,7 @@ static void GeometryShaderStage(
|
|||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
// foreach input prim:
|
||||
// - setup a new PA based on the emitted verts for that prim
|
||||
// - loop over the new verts, calling PA to assemble each prim
|
||||
|
@ -997,39 +1012,8 @@ static void GeometryShaderStage(
|
|||
vViewPortIdx = _simd16_set1_epi32(0);
|
||||
}
|
||||
|
||||
const uint32_t primMask = GenMask(gsPa.NumPrims());
|
||||
const uint32_t primMask_lo = primMask & 255;
|
||||
const uint32_t primMask_hi = (primMask >> 8) & 255;
|
||||
|
||||
const simd16scalari primID = vPrimId;
|
||||
const simdscalari primID_lo = _simd16_extract_si(primID, 0);
|
||||
const simdscalari primID_hi = _simd16_extract_si(primID, 1);
|
||||
|
||||
for (uint32_t i = 0; i < 3; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
attrib[i][j] = _simd16_extract_ps(attrib_simd16[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
gsPa.useAlternateOffset = false;
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib, primMask_lo, primID_lo, _simd16_extract_si(vViewPortIdx, 0));
|
||||
|
||||
if (primMask_hi)
|
||||
{
|
||||
for (uint32_t i = 0; i < 3; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
attrib[i][j] = _simd16_extract_ps(attrib_simd16[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
gsPa.useAlternateOffset = true;
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib, primMask_hi, primID_hi, _simd16_extract_si(vViewPortIdx, 1));
|
||||
}
|
||||
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
|
||||
#else
|
||||
simdscalari vPrimId;
|
||||
// pull primitiveID from the GS output if available
|
||||
|
@ -1202,6 +1186,20 @@ static void TessellationStages(
|
|||
}
|
||||
SWR_ASSERT(tsCtx);
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PFN_PROCESS_PRIMS_SIMD16 pfnClipFunc = nullptr;
|
||||
if (HasRastT::value)
|
||||
{
|
||||
switch (tsState.postDSTopology)
|
||||
{
|
||||
case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles_simd16; break;
|
||||
case TOP_LINE_LIST: pfnClipFunc = ClipLines_simd16; break;
|
||||
case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break;
|
||||
default: SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
|
||||
if (HasRastT::value)
|
||||
{
|
||||
|
@ -1214,6 +1212,7 @@ static void TessellationStages(
|
|||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
|
||||
hsContext.pCPout = gt_pTessellationThreadData->patchData;
|
||||
hsContext.PrimitiveID = primID;
|
||||
|
@ -1408,30 +1407,8 @@ static void TessellationStages(
|
|||
|
||||
SWR_ASSERT(pfnClipFunc);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
for (uint32_t i = 0; i < 3; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
tessPa.useAlternateOffset = false;
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim, primMask_lo, primID_lo, _simd_set1_epi32(0));
|
||||
|
||||
if (primMask_hi)
|
||||
{
|
||||
for (uint32_t i = 0; i < 3; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
tessPa.useAlternateOffset = true;
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim, primMask_hi, primID_hi, _simd_set1_epi32(0));
|
||||
}
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim_simd16, primMask, primID, _simd16_set1_epi32(0));
|
||||
#else
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim,
|
||||
GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
|
||||
|
@ -1791,34 +1768,10 @@ void ProcessDraw(
|
|||
|
||||
if (HasRastT::value)
|
||||
{
|
||||
SWR_ASSERT(pDC->pState->pfnProcessPrims);
|
||||
|
||||
simdvector prim[MAX_NUM_VERTS_PER_PRIM];
|
||||
|
||||
for (uint32_t i = 0; i < 3; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 0);
|
||||
}
|
||||
}
|
||||
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
|
||||
|
||||
pa.useAlternateOffset = false;
|
||||
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, primMask_lo, primID_lo, _simd_setzero_si());
|
||||
|
||||
if (primMask_hi)
|
||||
{
|
||||
for (uint32_t i = 0; i < 3; i += 1)
|
||||
{
|
||||
for (uint32_t j = 0; j < 4; j += 1)
|
||||
{
|
||||
prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
pa.useAlternateOffset = true;
|
||||
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, primMask_hi, primID_hi, _simd_setzero_si());
|
||||
}
|
||||
pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, primMask, primID, _simd16_setzero_si());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -315,8 +315,15 @@ void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, vo
|
|||
void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
|
||||
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
|
||||
#endif
|
||||
|
||||
struct PA_STATE_BASE; // forward decl
|
||||
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
|
||||
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
|
||||
#if USE_SIMD16_FRONTEND
|
||||
void BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
|
||||
void BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1228,7 +1228,11 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
|
|||
simdvector a;
|
||||
simdvector b;
|
||||
|
||||
#if 1
|
||||
const simd16vector &leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
|
||||
#else
|
||||
const simd16vector &leadvert_16 = pa.leadingVertex.attrib[slot];
|
||||
#endif
|
||||
|
||||
if (!pa.useAlternateOffset)
|
||||
{
|
||||
|
@ -1298,7 +1302,11 @@ bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
|
|||
bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
|
||||
{
|
||||
#if USE_SIMD16_FRONTEND
|
||||
#if 1
|
||||
const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot);
|
||||
#else
|
||||
const simd16vector &a = pa.leadingVertex.attrib[slot];
|
||||
#endif
|
||||
#else
|
||||
simd16vector a;
|
||||
|
||||
|
@ -1345,7 +1353,11 @@ bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
|
|||
void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[])
|
||||
{
|
||||
#if USE_SIMD16_FRONTEND
|
||||
#if 1
|
||||
const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot);
|
||||
#else
|
||||
const simd16vector &a = pa.leadingVertex.attrib[slot];
|
||||
#endif
|
||||
const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot);
|
||||
const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot);
|
||||
|
||||
|
|
Loading…
Reference in New Issue