From a373f1f27a82b91d6753c94d23cec2ac9f21f7b3 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Mon, 10 Apr 2017 11:29:45 -0500 Subject: [PATCH] swr/rast: more flexible max attribute slots Ability to allocate space for an arbitrary number (at compile time) of positions in the vertex layout. Removes KNOB_NUM_ATTRIBUTES from knobs.h, replaces the VTX slot number #defines with the SWR_VTX_SLOTS enum (which contains replacement for NUM_ATTRIBUTES: SWR_VTX_NUM_SLOTS) Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/frontend.cpp | 14 +++++----- .../drivers/swr/rasterizer/core/frontend.h | 2 +- .../drivers/swr/rasterizer/core/knobs.h | 3 -- .../swr/rasterizer/core/rasterizer.cpp | 6 ++-- .../drivers/swr/rasterizer/core/state.h | 28 +++++++++++-------- .../drivers/swr/rasterizer/jitter/fetch_jit.h | 2 +- .../swr/rasterizer/jitter/streamout_jit.cpp | 2 +- 7 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 8cf234cd67a..08a4fd3b61d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -510,7 +510,7 @@ static void StreamOut( uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false); // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each vertex. - uint32_t primDataDwordVertexStride = (KNOB_NUM_ATTRIBUTES * sizeof(float) * 4) / sizeof(uint32_t); + uint32_t primDataDwordVertexStride = (SWR_VTX_NUM_SLOTS * sizeof(float) * 4) / sizeof(uint32_t); SWR_STREAMOUT_CONTEXT soContext = { 0 }; @@ -618,13 +618,13 @@ INLINE static T RoundDownEven(T value) /// /// attribCount will limit the vector copies to those attribs specified /// -/// note: the stride between vertexes is determinded by KNOB_NUM_ATTRIBUTES +/// note: the stride between vertexes is determinded by SWR_VTX_NUM_SLOTS /// void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const simdvertex *vertex, uint32_t vertexCount, uint32_t attribCount) { SWR_ASSERT(vertex); SWR_ASSERT(vertex_simd16); - SWR_ASSERT(attribCount <= KNOB_NUM_ATTRIBUTES); + SWR_ASSERT(attribCount <= SWR_VTX_NUM_SLOTS); simd16vertex temp; @@ -709,7 +709,7 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num } curInputByte >>= 2; } - + *pCutBuffer++ = outByte; } } @@ -810,7 +810,7 @@ static void GeometryShaderStage( tlsGsContext.vert[i].attrib[attribSlot] = attrib[i]; } } - + // assemble position pa.Assemble(VERTEX_POSITION_SLOT, attrib); for (uint32_t i = 0; i < numVertsPerPrim; ++i) @@ -890,7 +890,7 @@ static void GeometryShaderStage( uint8_t* pBase = pInstanceBase + instance * bufferInfo.vertexInstanceStride; uint8_t* pCutBase = pCutBufferBase + instance * bufferInfo.cutInstanceStride; - + uint32_t numAttribs = state.feNumAttributes; for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream) @@ -930,7 +930,7 @@ static void GeometryShaderStage( tempVertex_simd16, reinterpret_cast(pBase), numEmittedVerts, - KNOB_NUM_ATTRIBUTES); + SWR_VTX_NUM_SLOTS); #endif #if USE_SIMD16_FRONTEND diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index a9c36b4559b..1ce51bbd5df 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -34,7 +34,7 @@ // TODO: this belongs in state.h alongside the simdvector definition, but there is a llvm codegen issue struct simd16vertex { - simd16vector attrib[KNOB_NUM_ATTRIBUTES]; + simd16vector attrib[SWR_VTX_NUM_SLOTS]; }; #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h index 7928f5d6d76..640b6726ca4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h @@ -97,9 +97,6 @@ // Maximum supported number of active vertex buffer streams #define KNOB_NUM_STREAMS 32 -// Maximum supported number of attributes per vertex -#define KNOB_NUM_ATTRIBUTES 39 - // Maximum supported active viewports and scissors #define KNOB_NUM_VIEWPORTS_SCISSORS 16 diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index af54779653e..4df146e9f99 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -510,7 +510,7 @@ __declspec(thread) volatile uint64_t gToss; static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4; // try to avoid _chkstk insertions; make this thread local -static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib]; +static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib]; INLINE void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge) @@ -1312,7 +1312,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, newWorkDesc.pTriBuffer = &newTriBuffer[0]; // create a copy of the attrib buffer to write our adjusted attribs to - OSALIGNSIMD(float) newAttribBuffer[4 * 3 * KNOB_NUM_ATTRIBUTES]; + OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS]; newWorkDesc.pAttribs = &newAttribBuffer[0]; newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer; @@ -1597,7 +1597,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi newWorkDesc.pTriBuffer = &newTriBuffer[0]; // create a copy of the attrib buffer to write our adjusted attribs to - OSALIGNSIMD(float) newAttribBuffer[4 * 3 * KNOB_NUM_ATTRIBUTES]; + OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS]; newWorkDesc.pAttribs = &newAttribBuffer[0]; const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f); diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 535b85e8593..3d0b4ff951f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -179,19 +179,25 @@ enum SWR_OUTER_TESSFACTOR_ID /// space for up to 32 attributes, as well as any SGV values generated /// by the pipeline ///////////////////////////////////////////////////////////////////////// -#define VERTEX_POSITION_SLOT 0 -#define VERTEX_ATTRIB_START_SLOT 1 -#define VERTEX_ATTRIB_END_SLOT 32 -#define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here -#define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here -#define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist -#define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist -#define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here -#define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38 +enum SWR_VTX_SLOTS +{ + VERTEX_POSITION_SLOT = 0, + VERTEX_POSITION_END_SLOT = 0, + VERTEX_ATTRIB_START_SLOT = ( 1 + VERTEX_POSITION_END_SLOT), + VERTEX_ATTRIB_END_SLOT = (32 + VERTEX_POSITION_END_SLOT), + VERTEX_RTAI_SLOT = (33 + VERTEX_POSITION_END_SLOT), // GS writes RenderTargetArrayIndex here + VERTEX_PRIMID_SLOT = (34 + VERTEX_POSITION_END_SLOT), // GS writes PrimId here + VERTEX_CLIPCULL_DIST_LO_SLOT = (35 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist + VERTEX_CLIPCULL_DIST_HI_SLOT = (36 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist + VERTEX_POINT_SIZE_SLOT = (37 + VERTEX_POSITION_END_SLOT), // VS writes point size here + VERTEX_VIEWPORT_ARRAY_INDEX_SLOT = (38 + VERTEX_POSITION_END_SLOT), + SWR_VTX_NUM_SLOTS = VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, +}; + // SoAoSoA struct simdvertex { - simdvector attrib[KNOB_NUM_ATTRIBUTES]; + simdvector attrib[SWR_VTX_NUM_SLOTS]; }; ////////////////////////////////////////////////////////////////////////// @@ -226,7 +232,7 @@ struct ScalarAttrib struct ScalarCPoint { - ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES]; + ScalarAttrib attrib[SWR_VTX_NUM_SLOTS]; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index d5cec70c2e8..4f456afffce 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -95,7 +95,7 @@ enum ComponentControl struct FETCH_COMPILE_STATE { uint32_t numAttribs{ 0 }; - INPUT_ELEMENT_DESC layout[KNOB_NUM_ATTRIBUTES]; + INPUT_ELEMENT_DESC layout[SWR_VTX_NUM_SLOTS]; SWR_FORMAT indexType; uint32_t cutIndex{ 0xffffffff }; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp index 2c19321fb3f..dbceb36c213 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp @@ -241,7 +241,7 @@ struct StreamOutJit : public Builder // increment stream and output buffer pointers // stream verts are always 32*4 dwords apart - pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4)); + pStreamData = GEP(pStreamData, C(SWR_VTX_NUM_SLOTS * 4)); // output buffers offset using pitch in buffer state for (uint32_t b : activeSOBuffers)