diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 519367228ea..ddee3b1a940 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -703,7 +703,7 @@ public: primMask &= ~ComputeNaNMask(prim); // user cull distance cull - if (state.backendState.cullDistanceMask) + if (state.backendState.cullDistanceMask | state.backendState.clipDistanceMask) { primMask &= ~ComputeUserClipCullMask(pa, prim); } diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index a284c422dff..489aa7862cb 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -227,8 +227,9 @@ OSALIGNLINE(struct) API_STATE // Vertex Buffers SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS]; - // Index Buffer - SWR_INDEX_BUFFER_STATE indexBuffer; + // GS - Geometry Shader State + SWR_GS_STATE gsState; + PFN_GS_FUNC pfnGsFunc; // FS - Fetch Shader State PFN_FETCH_FUNC pfnFetchFunc; @@ -236,9 +237,8 @@ OSALIGNLINE(struct) API_STATE // VS - Vertex Shader State PFN_VERTEX_FUNC pfnVertexFunc; - // GS - Geometry Shader State - PFN_GS_FUNC pfnGsFunc; - SWR_GS_STATE gsState; + // Index Buffer + SWR_INDEX_BUFFER_STATE indexBuffer; // CS - Compute Shader PFN_CS_FUNC pfnCsFunc; @@ -265,8 +265,6 @@ OSALIGNLINE(struct) API_STATE // Number of attributes used by the frontend (vs, so, gs) uint32_t feNumAttributes; - PRIMITIVE_TOPOLOGY topology; - bool forceFront; // RS - Rasterizer State SWR_RASTSTATE rastState; @@ -282,8 +280,12 @@ OSALIGNLINE(struct) API_STATE SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS]; bool scissorsTileAligned; + bool forceFront; + PRIMITIVE_TOPOLOGY topology; + + // Backend state - SWR_BACKEND_STATE backendState; + OSALIGNLINE(SWR_BACKEND_STATE) backendState; SWR_DEPTH_BOUNDS_STATE depthBoundsState; @@ -400,8 +402,6 @@ struct DRAW_CONTEXT DispatchQueue* pDispatch; // Queue for thread groups. (isCompute) }; DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread. - DRAW_DYNAMIC_STATE dynState; - CachingArena* pArena; uint32_t drawId; @@ -412,11 +412,13 @@ struct DRAW_CONTEXT FE_WORK FeWork; + SYNC_DESC retireCallback; // Call this func when this DC is retired. + + DRAW_DYNAMIC_STATE dynState; + volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw? volatile OSALIGNLINE(uint32_t) FeLock; volatile OSALIGNLINE(uint32_t) threadsDone; - - SYNC_DESC retireCallback; // Call this func when this DC is retired. }; static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT"); diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index d959c64361d..6b108d9c21e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -228,8 +228,8 @@ struct SWR_VS_CONTEXT #if USE_SIMD16_FRONTEND uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output #if USE_SIMD16_VS - simd16scalari mask16; // IN: Active mask for shader (16-wide) - simd16scalari VertexID16; // IN: Vertex ID (16-wide) + simd16scalari mask16; // IN: Active mask for shader (16-wide) + simd16scalari VertexID16; // IN: Vertex ID (16-wide) #endif #endif }; @@ -553,11 +553,10 @@ struct SWR_SURFACE_STATE // in the fetch shader jit struct SWR_VERTEX_BUFFER_STATE { + gfxptr_t xpData; uint32_t index; uint32_t pitch; - gfxptr_t xpData; uint32_t size; - uint32_t numaNode; uint32_t minVertex; // min vertex (for bounds checking) uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices @@ -565,9 +564,9 @@ struct SWR_VERTEX_BUFFER_STATE struct SWR_INDEX_BUFFER_STATE { + const void *pIndices; // Format type for indices (e.g. UINT16, UINT32, etc.) SWR_FORMAT format; // @llvm_enum - const void *pIndices; uint32_t size; }; @@ -646,12 +645,15 @@ OSALIGNLINE(struct) SWR_STATS_FE struct SWR_STREAMOUT_BUFFER { - bool enable; - bool soWriteEnable; - // Pointers to streamout buffers. uint32_t* pBuffer; + // Offset to the SO write offset. If not null then we update offset here. + uint32_t* pWriteOffset; + + bool enable; + bool soWriteEnable; + // Size of buffer in dwords. uint32_t bufferSize; @@ -660,10 +662,6 @@ struct SWR_STREAMOUT_BUFFER // Offset into buffer in dwords. SOS will increment this offset. uint32_t streamOffset; - - // Offset to the SO write offset. If not null then we update offset here. - uint32_t* pWriteOffset; - }; ////////////////////////////////////////////////////////////////////////// @@ -718,6 +716,11 @@ struct SWR_GS_STATE { bool gsEnable; + // If true, geometry shader emits a single stream, with separate cut buffer. + // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer + // to map vertices to streams + bool isSingleStream; + // Number of input attributes per vertex. Used by the frontend to // optimize assembling primitives for GS uint32_t numInputAttribs; @@ -730,15 +733,10 @@ struct SWR_GS_STATE // Maximum number of verts that can be emitted by a single instance of the GS uint32_t maxNumVerts; - + // Instance count uint32_t instanceCount; - // If true, geometry shader emits a single stream, with separate cut buffer. - // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer - // to map vertices to streams - bool isSingleStream; - // When single stream is enabled, singleStreamID dictates which stream is being output. // field ignored if isSingleStream is false uint32_t singleStreamID; @@ -768,7 +766,11 @@ struct SWR_GS_STATE // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is // expected to store the final vertex count in the first dword of the gs output stream. uint32_t staticVertexCount; + + uint32_t pad; }; +static_assert(sizeof(SWR_GS_STATE) == 64, + "Adjust padding to keep size (or remove this assert)"); ////////////////////////////////////////////////////////////////////////// @@ -814,6 +816,7 @@ enum SWR_TS_DOMAIN struct SWR_TS_STATE { bool tsEnable; + SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum SWR_TS_PARTITIONING partitioning; // @llvm_enum SWR_TS_DOMAIN domain; // @llvm_enum @@ -863,11 +866,11 @@ struct SWR_BLEND_STATE float constantColor[4]; // alpha test reference value in unorm8 or float32 - uint32_t alphaTestReference; + uint32_t alphaTestReference; uint32_t sampleMask; // all RT's have the same sample count ///@todo move this to Output Merger state when we refactor - SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum + SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS]; }; @@ -889,8 +892,8 @@ typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsConte typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext); typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); -typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, - simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample, +typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, + simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample, uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask); typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &); @@ -917,7 +920,7 @@ struct SWR_FRONTEND_STATE } provokingVertex; uint32_t topologyProvokingVertex; // provoking vertex for the draw topology - // Size of a vertex in simdvector units. Should be sized to the + // Size of a vertex in simdvector units. Should be sized to the // maximum of the input/output of the vertex shader. uint32_t vsVertexSize; }; @@ -1013,7 +1016,7 @@ public: INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func - + INLINE void PrecalcSampleData(int numSamples); //@llvm_func private: @@ -1081,7 +1084,7 @@ enum SWR_CONSTANT_SOURCE struct SWR_ATTRIB_SWIZZLE { - uint16_t sourceAttrib : 5; // source attribute + uint16_t sourceAttrib : 5; // source attribute uint16_t constantSource : 2; // constant source to apply uint16_t componentOverrideMask : 4; // override component with constant source }; @@ -1092,27 +1095,33 @@ struct SWR_BACKEND_STATE uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates + bool swizzleEnable; // when enabled, core will parse the swizzle map when + // setting up attributes for the backend, otherwise + // all attributes up to numAttributes will be sent uint8_t numAttributes; // total number of attributes to send to backend (up to 32) uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components - bool swizzleEnable; // when enabled, core will parse the swizzle map when - // setting up attributes for the backend, otherwise - // all attributes up to numAttributes will be sent - SWR_ATTRIB_SWIZZLE swizzleMap[32]; - bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning - - // Offset to the start of the attributes of the input vertices, in simdvector units - uint32_t vertexAttribOffset; // User clip/cull distance enables uint8_t cullDistanceMask; uint8_t clipDistanceMask; + // padding to ensure swizzleMap starts 64B offset from start of the struct + // and that the next fields are dword aligned. + uint8_t pad[10]; + + // Offset to the start of the attributes of the input vertices, in simdvector units + uint32_t vertexAttribOffset; + // Offset to clip/cull attrib section of the vertex, in simdvector units uint32_t vertexClipCullOffset; + + SWR_ATTRIB_SWIZZLE swizzleMap[32]; }; +static_assert(sizeof(SWR_BACKEND_STATE) == 128, + "Adjust padding to keep size (or remove this assert)"); union SWR_DEPTH_STENCIL_STATE @@ -1167,8 +1176,8 @@ enum SWR_INPUT_COVERAGE enum SWR_PS_POSITION_OFFSET { - SWR_PS_POSITION_SAMPLE_NONE, - SWR_PS_POSITION_SAMPLE_OFFSET, + SWR_PS_POSITION_SAMPLE_NONE, + SWR_PS_POSITION_SAMPLE_OFFSET, SWR_PS_POSITION_CENTROID_OFFSET, SWR_PS_POSITION_OFFSET_COUNT, }; @@ -1194,7 +1203,7 @@ struct SWR_PS_STATE uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with - uint32_t usesUAV : 1; // pixel shader accesses UAV + uint32_t usesUAV : 1; // pixel shader accesses UAV uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test uint8_t renderTargetMask; // Mask of render targets written