swr/rast: more flexible max attribute slots
Ability to allocate space for an arbitrary number (at compile time) of positions in the vertex layout. Removes KNOB_NUM_ATTRIBUTES from knobs.h, replaces the VTX slot number #defines with the SWR_VTX_SLOTS enum (which contains replacement for NUM_ATTRIBUTES: SWR_VTX_NUM_SLOTS) Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
54d42cd976
commit
a373f1f27a
|
@ -510,7 +510,7 @@ static void StreamOut(
|
|||
uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
|
||||
|
||||
// The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each vertex.
|
||||
uint32_t primDataDwordVertexStride = (KNOB_NUM_ATTRIBUTES * sizeof(float) * 4) / sizeof(uint32_t);
|
||||
uint32_t primDataDwordVertexStride = (SWR_VTX_NUM_SLOTS * sizeof(float) * 4) / sizeof(uint32_t);
|
||||
|
||||
SWR_STREAMOUT_CONTEXT soContext = { 0 };
|
||||
|
||||
|
@ -618,13 +618,13 @@ INLINE static T RoundDownEven(T value)
|
|||
///
|
||||
/// attribCount will limit the vector copies to those attribs specified
|
||||
///
|
||||
/// note: the stride between vertexes is determinded by KNOB_NUM_ATTRIBUTES
|
||||
/// note: the stride between vertexes is determinded by SWR_VTX_NUM_SLOTS
|
||||
///
|
||||
void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const simdvertex *vertex, uint32_t vertexCount, uint32_t attribCount)
|
||||
{
|
||||
SWR_ASSERT(vertex);
|
||||
SWR_ASSERT(vertex_simd16);
|
||||
SWR_ASSERT(attribCount <= KNOB_NUM_ATTRIBUTES);
|
||||
SWR_ASSERT(attribCount <= SWR_VTX_NUM_SLOTS);
|
||||
|
||||
simd16vertex temp;
|
||||
|
||||
|
@ -709,7 +709,7 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num
|
|||
}
|
||||
curInputByte >>= 2;
|
||||
}
|
||||
|
||||
|
||||
*pCutBuffer++ = outByte;
|
||||
}
|
||||
}
|
||||
|
@ -810,7 +810,7 @@ static void GeometryShaderStage(
|
|||
tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// assemble position
|
||||
pa.Assemble(VERTEX_POSITION_SLOT, attrib);
|
||||
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
|
||||
|
@ -890,7 +890,7 @@ static void GeometryShaderStage(
|
|||
|
||||
uint8_t* pBase = pInstanceBase + instance * bufferInfo.vertexInstanceStride;
|
||||
uint8_t* pCutBase = pCutBufferBase + instance * bufferInfo.cutInstanceStride;
|
||||
|
||||
|
||||
uint32_t numAttribs = state.feNumAttributes;
|
||||
|
||||
for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
|
||||
|
@ -930,7 +930,7 @@ static void GeometryShaderStage(
|
|||
tempVertex_simd16,
|
||||
reinterpret_cast<const simdvertex *>(pBase),
|
||||
numEmittedVerts,
|
||||
KNOB_NUM_ATTRIBUTES);
|
||||
SWR_VTX_NUM_SLOTS);
|
||||
|
||||
#endif
|
||||
#if USE_SIMD16_FRONTEND
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
// TODO: this belongs in state.h alongside the simdvector definition, but there is a llvm codegen issue
|
||||
struct simd16vertex
|
||||
{
|
||||
simd16vector attrib[KNOB_NUM_ATTRIBUTES];
|
||||
simd16vector attrib[SWR_VTX_NUM_SLOTS];
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -97,9 +97,6 @@
|
|||
// Maximum supported number of active vertex buffer streams
|
||||
#define KNOB_NUM_STREAMS 32
|
||||
|
||||
// Maximum supported number of attributes per vertex
|
||||
#define KNOB_NUM_ATTRIBUTES 39
|
||||
|
||||
// Maximum supported active viewports and scissors
|
||||
#define KNOB_NUM_VIEWPORTS_SCISSORS 16
|
||||
|
||||
|
|
|
@ -510,7 +510,7 @@ __declspec(thread) volatile uint64_t gToss;
|
|||
|
||||
static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
|
||||
// try to avoid _chkstk insertions; make this thread local
|
||||
static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib];
|
||||
static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
|
||||
|
||||
INLINE
|
||||
void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
|
||||
|
@ -1312,7 +1312,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
|
|||
newWorkDesc.pTriBuffer = &newTriBuffer[0];
|
||||
|
||||
// create a copy of the attrib buffer to write our adjusted attribs to
|
||||
OSALIGNSIMD(float) newAttribBuffer[4 * 3 * KNOB_NUM_ATTRIBUTES];
|
||||
OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
|
||||
newWorkDesc.pAttribs = &newAttribBuffer[0];
|
||||
|
||||
newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
|
||||
|
@ -1597,7 +1597,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
newWorkDesc.pTriBuffer = &newTriBuffer[0];
|
||||
|
||||
// create a copy of the attrib buffer to write our adjusted attribs to
|
||||
OSALIGNSIMD(float) newAttribBuffer[4 * 3 * KNOB_NUM_ATTRIBUTES];
|
||||
OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
|
||||
newWorkDesc.pAttribs = &newAttribBuffer[0];
|
||||
|
||||
const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f);
|
||||
|
|
|
@ -179,19 +179,25 @@ enum SWR_OUTER_TESSFACTOR_ID
|
|||
/// space for up to 32 attributes, as well as any SGV values generated
|
||||
/// by the pipeline
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
#define VERTEX_POSITION_SLOT 0
|
||||
#define VERTEX_ATTRIB_START_SLOT 1
|
||||
#define VERTEX_ATTRIB_END_SLOT 32
|
||||
#define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
|
||||
#define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
|
||||
#define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
|
||||
#define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
|
||||
#define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
|
||||
#define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38
|
||||
enum SWR_VTX_SLOTS
|
||||
{
|
||||
VERTEX_POSITION_SLOT = 0,
|
||||
VERTEX_POSITION_END_SLOT = 0,
|
||||
VERTEX_ATTRIB_START_SLOT = ( 1 + VERTEX_POSITION_END_SLOT),
|
||||
VERTEX_ATTRIB_END_SLOT = (32 + VERTEX_POSITION_END_SLOT),
|
||||
VERTEX_RTAI_SLOT = (33 + VERTEX_POSITION_END_SLOT), // GS writes RenderTargetArrayIndex here
|
||||
VERTEX_PRIMID_SLOT = (34 + VERTEX_POSITION_END_SLOT), // GS writes PrimId here
|
||||
VERTEX_CLIPCULL_DIST_LO_SLOT = (35 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
|
||||
VERTEX_CLIPCULL_DIST_HI_SLOT = (36 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
|
||||
VERTEX_POINT_SIZE_SLOT = (37 + VERTEX_POSITION_END_SLOT), // VS writes point size here
|
||||
VERTEX_VIEWPORT_ARRAY_INDEX_SLOT = (38 + VERTEX_POSITION_END_SLOT),
|
||||
SWR_VTX_NUM_SLOTS = VERTEX_VIEWPORT_ARRAY_INDEX_SLOT,
|
||||
};
|
||||
|
||||
// SoAoSoA
|
||||
struct simdvertex
|
||||
{
|
||||
simdvector attrib[KNOB_NUM_ATTRIBUTES];
|
||||
simdvector attrib[SWR_VTX_NUM_SLOTS];
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -226,7 +232,7 @@ struct ScalarAttrib
|
|||
|
||||
struct ScalarCPoint
|
||||
{
|
||||
ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
|
||||
ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -95,7 +95,7 @@ enum ComponentControl
|
|||
struct FETCH_COMPILE_STATE
|
||||
{
|
||||
uint32_t numAttribs{ 0 };
|
||||
INPUT_ELEMENT_DESC layout[KNOB_NUM_ATTRIBUTES];
|
||||
INPUT_ELEMENT_DESC layout[SWR_VTX_NUM_SLOTS];
|
||||
SWR_FORMAT indexType;
|
||||
uint32_t cutIndex{ 0xffffffff };
|
||||
|
||||
|
|
|
@ -241,7 +241,7 @@ struct StreamOutJit : public Builder
|
|||
|
||||
// increment stream and output buffer pointers
|
||||
// stream verts are always 32*4 dwords apart
|
||||
pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4));
|
||||
pStreamData = GEP(pStreamData, C(SWR_VTX_NUM_SLOTS * 4));
|
||||
|
||||
// output buffers offset using pitch in buffer state
|
||||
for (uint32_t b : activeSOBuffers)
|
||||
|
|
Loading…
Reference in New Issue