From aee5276375d79f5d73680d6038a1fd838894679a Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Wed, 22 Mar 2017 12:36:49 -0500 Subject: [PATCH] swr: [rasterizer core] SIMD16 Frontend WIP Implement widened clipper and binner interfaces for SIMD16. Reviewed-by: George Kyriazis --- .../drivers/swr/rasterizer/core/api.cpp | 24 +++ .../drivers/swr/rasterizer/core/binner.cpp | 154 ++++++++++++++++++ .../drivers/swr/rasterizer/core/clip.cpp | 131 +++++++++++++++ .../drivers/swr/rasterizer/core/clip.h | 6 + .../drivers/swr/rasterizer/core/context.h | 3 + .../drivers/swr/rasterizer/core/frontend.cpp | 115 ++++--------- .../drivers/swr/rasterizer/core/frontend.h | 7 + .../drivers/swr/rasterizer/core/pa_avx.cpp | 12 ++ 8 files changed, 371 insertions(+), 81 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index bd63796d138..dabd0616d3b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -839,11 +839,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC) } PFN_PROCESS_PRIMS pfnBinner; +#if USE_SIMD16_FRONTEND + PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16; +#endif switch (pState->state.topology) { case TOP_POINT_LIST: pState->pfnProcessPrims = ClipPoints; pfnBinner = BinPoints; +#if USE_SIMD16_FRONTEND + pState->pfnProcessPrims_simd16 = ClipPoints_simd16; + pfnBinner_simd16 = BinPoints_simd16; +#endif break; case TOP_LINE_LIST: case TOP_LINE_STRIP: @@ -852,10 +859,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC) case TOP_LISTSTRIP_ADJ: pState->pfnProcessPrims = ClipLines; pfnBinner = BinLines; +#if USE_SIMD16_FRONTEND + pState->pfnProcessPrims_simd16 = ClipLines_simd16; + pfnBinner_simd16 = BinLines_simd16; +#endif break; default: pState->pfnProcessPrims = ClipTriangles; pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0)); +#if USE_SIMD16_FRONTEND + pState->pfnProcessPrims_simd16 = ClipTriangles_simd16; + pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0)); +#endif break; }; @@ -864,6 +879,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC) if (pState->state.frontendState.vpTransformDisable) { pState->pfnProcessPrims = pfnBinner; +#if USE_SIMD16_FRONTEND + pState->pfnProcessPrims_simd16 = pfnBinner_simd16; +#endif } if ((pState->state.psState.pfnPixelShader == nullptr) && @@ -874,11 +892,17 @@ void SetupPipeline(DRAW_CONTEXT *pDC) (pState->state.backendState.numAttributes == 0)) { pState->pfnProcessPrims = nullptr; +#if USE_SIMD16_FRONTEND + pState->pfnProcessPrims_simd16 = nullptr; +#endif } if (pState->state.soState.rasterizerDisable == true) { pState->pfnProcessPrims = nullptr; +#if USE_SIMD16_FRONTEND + pState->pfnProcessPrims_simd16 = nullptr; +#endif } diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 490a86804fc..63eab33ac0b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -856,6 +856,58 @@ endBinTriangles: AR_END(FEBinTriangles, 1); } +#if USE_SIMD16_FRONTEND +inline uint32_t GetPrimMaskLo(uint32_t primMask) +{ + return primMask & 255; +} + +inline uint32_t GetPrimMaskHi(uint32_t primMask) +{ + return (primMask >> 8) & 255; +} + +template +void BinTriangles_simd16( + DRAW_CONTEXT *pDC, + PA_STATE& pa, + uint32_t workerId, + simd16vector tri[3], + uint32_t triMask, + simd16scalari primID, + simd16scalari viewportIdx) +{ + enum { VERTS_PER_PRIM = 3 }; + + simdvector verts[VERTS_PER_PRIM]; + + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(tri[i][j], 0); + } + } + + pa.useAlternateOffset = false; + BinTriangles(pDC, pa, workerId, verts, GetPrimMaskLo(triMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0)); + + if (GetPrimMaskHi(triMask)) + { + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(tri[i][j], 1); + } + } + + pa.useAlternateOffset = true; + BinTriangles(pDC, pa, workerId, verts, GetPrimMaskHi(triMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1)); + } +} + +#endif struct FEBinTrianglesChooser { typedef PFN_PROCESS_PRIMS FuncType; @@ -873,6 +925,25 @@ PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative) return TemplateArgUnroller::GetFunc(IsConservative); } +#if USE_SIMD16_FRONTEND +struct FEBinTrianglesChooser_simd16 +{ + typedef PFN_PROCESS_PRIMS_SIMD16 FuncType; + + template + static FuncType GetFunc() + { + return BinTriangles_simd16>; + } +}; + +// Selector for correct templated BinTrinagles function +PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative) +{ + return TemplateArgUnroller::GetFunc(IsConservative); +} + +#endif ////////////////////////////////////////////////////////////////////////// /// @brief Bin SIMD points to the backend. Only supports point size of 1 @@ -1217,6 +1288,47 @@ void BinPoints( AR_END(FEBinPoints, 1); } +#if USE_SIMD16_FRONTEND +void BinPoints_simd16( + DRAW_CONTEXT *pDC, + PA_STATE& pa, + uint32_t workerId, + simd16vector prim[3], + uint32_t primMask, + simd16scalari primID, + simd16scalari viewportIdx) +{ + enum { VERTS_PER_PRIM = 1 }; + + simdvector verts[VERTS_PER_PRIM]; + + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prim[i][j], 0); + } + } + + pa.useAlternateOffset = false; + BinPoints(pDC, pa, workerId, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0)); + + if (GetPrimMaskHi(primMask)) + { + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prim[i][j], 1); + } + } + + pa.useAlternateOffset = true; + BinPoints(pDC, pa, workerId, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1)); + } +} + +#endif ////////////////////////////////////////////////////////////////////////// /// @brief Bin SIMD lines to the backend. /// @param pDC - pointer to draw context. @@ -1503,3 +1615,45 @@ void BinLines( primID, viewportIdx); } + +#if USE_SIMD16_FRONTEND +void BinLines_simd16( + DRAW_CONTEXT *pDC, + PA_STATE& pa, + uint32_t workerId, + simd16vector prim[3], + uint32_t primMask, + simd16scalari primID, + simd16scalari viewportIdx) +{ + enum { VERTS_PER_PRIM = 2 }; + + simdvector verts[VERTS_PER_PRIM]; + + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prim[i][j], 0); + } + } + + pa.useAlternateOffset = false; + BinLines(pDC, pa, workerId, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0)); + + if (GetPrimMaskHi(primMask)) + { + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prim[i][j], 1); + } + } + + pa.useAlternateOffset = true; + BinLines(pDC, pa, workerId, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1)); + } +} + +#endif diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index 9e919d3a252..6fc7e162b4f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -174,6 +174,7 @@ void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector pr clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipLines, 1); } + void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -183,3 +184,133 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p AR_END(FEClipPoints, 1); } +#if USE_SIMD16_FRONTEND +inline uint32_t GetPrimMaskLo(uint32_t primMask) +{ + return primMask & 255; +} + +inline uint32_t GetPrimMaskHi(uint32_t primMask) +{ + return (primMask >> 8) & 255; +} + +void ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx) +{ + SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(FEClipTriangles, pDC->drawId); + + enum { VERTS_PER_PRIM = 3 }; + + Clipper clipper(workerId, pDC); + + simdvector verts[VERTS_PER_PRIM]; + + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prims[i][j], 0); + } + } + + pa.useAlternateOffset = false; + clipper.ExecuteStage(pa, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primId, 0), _simd16_extract_si(viewportIdx, 0)); + + if (GetPrimMaskHi(primMask)) + { + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prims[i][j], 1); + } + } + + pa.useAlternateOffset = true; + clipper.ExecuteStage(pa, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primId, 1), _simd16_extract_si(viewportIdx, 1)); + } + + AR_END(FEClipTriangles, 1); +} + +void ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx) +{ + SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(FEClipLines, pDC->drawId); + + enum { VERTS_PER_PRIM = 2 }; + + Clipper clipper(workerId, pDC); + + simdvector verts[VERTS_PER_PRIM]; + + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prims[i][j], 0); + } + } + + pa.useAlternateOffset = false; + clipper.ExecuteStage(pa, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primId, 0), _simd16_extract_si(viewportIdx, 0)); + + if (GetPrimMaskHi(primMask)) + { + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prims[i][j], 1); + } + } + + pa.useAlternateOffset = true; + clipper.ExecuteStage(pa, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primId, 1), _simd16_extract_si(viewportIdx, 1)); + } + + AR_END(FEClipLines, 1); +} + +void ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx) +{ + SWR_CONTEXT *pContext = pDC->pContext; + AR_BEGIN(FEClipPoints, pDC->drawId); + + enum { VERTS_PER_PRIM = 1 }; + + Clipper clipper(workerId, pDC); + + simdvector verts[VERTS_PER_PRIM]; + + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prims[i][j], 0); + } + } + + pa.useAlternateOffset = false; + clipper.ExecuteStage(pa, verts, GetPrimMaskLo(primMask), _simd16_extract_si(primId, 0), _simd16_extract_si(viewportIdx, 0)); + + if (GetPrimMaskHi(primMask)) + { + for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) + { + for (uint32_t j = 0; j < 4; j += 1) + { + verts[i][j] = _simd16_extract_ps(prims[i][j], 1); + } + } + + pa.useAlternateOffset = true; + clipper.ExecuteStage(pa, verts, GetPrimMaskHi(primMask), _simd16_extract_si(primId, 1), _simd16_extract_si(viewportIdx, 1)); + } + + AR_END(FEClipPoints, 1); +} + +#endif + diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 3a79d6a34c4..017f5e795c4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -969,3 +969,9 @@ private: void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx); void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx); void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx); +#if USE_SIMD16_FRONTEND +void ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx); +void ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx); +void ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx); +#endif + diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 9da7962826c..b520df225b8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -363,6 +363,9 @@ struct DRAW_STATE // pipeline function pointers, filled in by API thread when setting up the draw BACKEND_FUNCS backendFuncs; PFN_PROCESS_PRIMS pfnProcessPrims; +#if USE_SIMD16_FRONTEND + PFN_PROCESS_PRIMS_SIMD16 pfnProcessPrims_simd16; +#endif CachingArena* pArena; // This should only be used by API thread. }; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index eb52594af50..9df7eeadc10 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -841,6 +841,20 @@ static void GeometryShaderStage( } // set up new binner and state for the GS output topology +#if USE_SIMD16_FRONTEND + PFN_PROCESS_PRIMS_SIMD16 pfnClipFunc = nullptr; + if (HasRastT::value) + { + switch (pState->outputTopology) + { + case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles_simd16; break; + case TOP_LINE_STRIP: pfnClipFunc = ClipLines_simd16; break; + case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break; + default: SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology); + } + } + +#else PFN_PROCESS_PRIMS pfnClipFunc = nullptr; if (HasRastT::value) { @@ -853,6 +867,7 @@ static void GeometryShaderStage( } } +#endif // foreach input prim: // - setup a new PA based on the emitted verts for that prim // - loop over the new verts, calling PA to assemble each prim @@ -997,39 +1012,8 @@ static void GeometryShaderStage( vViewPortIdx = _simd16_set1_epi32(0); } - const uint32_t primMask = GenMask(gsPa.NumPrims()); - const uint32_t primMask_lo = primMask & 255; - const uint32_t primMask_hi = (primMask >> 8) & 255; - - const simd16scalari primID = vPrimId; - const simdscalari primID_lo = _simd16_extract_si(primID, 0); - const simdscalari primID_hi = _simd16_extract_si(primID, 1); - - for (uint32_t i = 0; i < 3; i += 1) - { - for (uint32_t j = 0; j < 4; j += 1) - { - attrib[i][j] = _simd16_extract_ps(attrib_simd16[i][j], 0); - } - } - gsPa.useAlternateOffset = false; - pfnClipFunc(pDC, gsPa, workerId, attrib, primMask_lo, primID_lo, _simd16_extract_si(vViewPortIdx, 0)); - - if (primMask_hi) - { - for (uint32_t i = 0; i < 3; i += 1) - { - for (uint32_t j = 0; j < 4; j += 1) - { - attrib[i][j] = _simd16_extract_ps(attrib_simd16[i][j], 1); - } - } - - gsPa.useAlternateOffset = true; - pfnClipFunc(pDC, gsPa, workerId, attrib, primMask_hi, primID_hi, _simd16_extract_si(vViewPortIdx, 1)); - } - + pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx); #else simdscalari vPrimId; // pull primitiveID from the GS output if available @@ -1202,6 +1186,20 @@ static void TessellationStages( } SWR_ASSERT(tsCtx); +#if USE_SIMD16_FRONTEND + PFN_PROCESS_PRIMS_SIMD16 pfnClipFunc = nullptr; + if (HasRastT::value) + { + switch (tsState.postDSTopology) + { + case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles_simd16; break; + case TOP_LINE_LIST: pfnClipFunc = ClipLines_simd16; break; + case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break; + default: SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology); + } + } + +#else PFN_PROCESS_PRIMS pfnClipFunc = nullptr; if (HasRastT::value) { @@ -1214,6 +1212,7 @@ static void TessellationStages( } } +#endif SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext; hsContext.pCPout = gt_pTessellationThreadData->patchData; hsContext.PrimitiveID = primID; @@ -1408,30 +1407,8 @@ static void TessellationStages( SWR_ASSERT(pfnClipFunc); #if USE_SIMD16_FRONTEND - for (uint32_t i = 0; i < 3; i += 1) - { - for (uint32_t j = 0; j < 4; j += 1) - { - prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 0); - } - } - tessPa.useAlternateOffset = false; - pfnClipFunc(pDC, tessPa, workerId, prim, primMask_lo, primID_lo, _simd_set1_epi32(0)); - - if (primMask_hi) - { - for (uint32_t i = 0; i < 3; i += 1) - { - for (uint32_t j = 0; j < 4; j += 1) - { - prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 1); - } - } - - tessPa.useAlternateOffset = true; - pfnClipFunc(pDC, tessPa, workerId, prim, primMask_hi, primID_hi, _simd_set1_epi32(0)); - } + pfnClipFunc(pDC, tessPa, workerId, prim_simd16, primMask, primID, _simd16_set1_epi32(0)); #else pfnClipFunc(pDC, tessPa, workerId, prim, GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0)); @@ -1791,34 +1768,10 @@ void ProcessDraw( if (HasRastT::value) { - SWR_ASSERT(pDC->pState->pfnProcessPrims); - - simdvector prim[MAX_NUM_VERTS_PER_PRIM]; - - for (uint32_t i = 0; i < 3; i += 1) - { - for (uint32_t j = 0; j < 4; j += 1) - { - prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 0); - } - } + SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16); pa.useAlternateOffset = false; - pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, primMask_lo, primID_lo, _simd_setzero_si()); - - if (primMask_hi) - { - for (uint32_t i = 0; i < 3; i += 1) - { - for (uint32_t j = 0; j < 4; j += 1) - { - prim[i][j] = _simd16_extract_ps(prim_simd16[i][j], 1); - } - } - - pa.useAlternateOffset = true; - pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, primMask_hi, primID_hi, _simd_setzero_si()); - } + pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, primMask, primID, _simd16_setzero_si()); } } } diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index 58d6901a819..37b7215c516 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -315,8 +315,15 @@ void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, vo void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative); +#if USE_SIMD16_FRONTEND +PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative); +#endif struct PA_STATE_BASE; // forward decl void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx); void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx); +#if USE_SIMD16_FRONTEND +void BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx); +void BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx); +#endif diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp index 511a1fc0df3..3e3b7abab53 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp @@ -1228,7 +1228,11 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]) simdvector a; simdvector b; +#if 1 + const simd16vector &leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot); +#else const simd16vector &leadvert_16 = pa.leadingVertex.attrib[slot]; +#endif if (!pa.useAlternateOffset) { @@ -1298,7 +1302,11 @@ bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) { #if USE_SIMD16_FRONTEND +#if 1 + const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot); +#else const simd16vector &a = pa.leadingVertex.attrib[slot]; +#endif #else simd16vector a; @@ -1345,7 +1353,11 @@ bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, __m128 verts[]) { #if USE_SIMD16_FRONTEND +#if 1 + const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot); +#else const simd16vector &a = pa.leadingVertex.attrib[slot]; +#endif const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot); const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot);