swr/rast: Misc cleanup
Together with correct detection of clipDistance NaNs when no cullDistance is set Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
036c8b6247
commit
9cc9688e49
|
@ -703,7 +703,7 @@ public:
|
|||
primMask &= ~ComputeNaNMask(prim);
|
||||
|
||||
// user cull distance cull
|
||||
if (state.backendState.cullDistanceMask)
|
||||
if (state.backendState.cullDistanceMask | state.backendState.clipDistanceMask)
|
||||
{
|
||||
primMask &= ~ComputeUserClipCullMask(pa, prim);
|
||||
}
|
||||
|
|
|
@ -227,8 +227,9 @@ OSALIGNLINE(struct) API_STATE
|
|||
// Vertex Buffers
|
||||
SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
|
||||
|
||||
// Index Buffer
|
||||
SWR_INDEX_BUFFER_STATE indexBuffer;
|
||||
// GS - Geometry Shader State
|
||||
SWR_GS_STATE gsState;
|
||||
PFN_GS_FUNC pfnGsFunc;
|
||||
|
||||
// FS - Fetch Shader State
|
||||
PFN_FETCH_FUNC pfnFetchFunc;
|
||||
|
@ -236,9 +237,8 @@ OSALIGNLINE(struct) API_STATE
|
|||
// VS - Vertex Shader State
|
||||
PFN_VERTEX_FUNC pfnVertexFunc;
|
||||
|
||||
// GS - Geometry Shader State
|
||||
PFN_GS_FUNC pfnGsFunc;
|
||||
SWR_GS_STATE gsState;
|
||||
// Index Buffer
|
||||
SWR_INDEX_BUFFER_STATE indexBuffer;
|
||||
|
||||
// CS - Compute Shader
|
||||
PFN_CS_FUNC pfnCsFunc;
|
||||
|
@ -265,8 +265,6 @@ OSALIGNLINE(struct) API_STATE
|
|||
// Number of attributes used by the frontend (vs, so, gs)
|
||||
uint32_t feNumAttributes;
|
||||
|
||||
PRIMITIVE_TOPOLOGY topology;
|
||||
bool forceFront;
|
||||
|
||||
// RS - Rasterizer State
|
||||
SWR_RASTSTATE rastState;
|
||||
|
@ -282,8 +280,12 @@ OSALIGNLINE(struct) API_STATE
|
|||
SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
|
||||
bool scissorsTileAligned;
|
||||
|
||||
bool forceFront;
|
||||
PRIMITIVE_TOPOLOGY topology;
|
||||
|
||||
|
||||
// Backend state
|
||||
SWR_BACKEND_STATE backendState;
|
||||
OSALIGNLINE(SWR_BACKEND_STATE) backendState;
|
||||
|
||||
SWR_DEPTH_BOUNDS_STATE depthBoundsState;
|
||||
|
||||
|
@ -400,8 +402,6 @@ struct DRAW_CONTEXT
|
|||
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
|
||||
};
|
||||
DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
|
||||
DRAW_DYNAMIC_STATE dynState;
|
||||
|
||||
CachingArena* pArena;
|
||||
|
||||
uint32_t drawId;
|
||||
|
@ -412,11 +412,13 @@ struct DRAW_CONTEXT
|
|||
|
||||
FE_WORK FeWork;
|
||||
|
||||
SYNC_DESC retireCallback; // Call this func when this DC is retired.
|
||||
|
||||
DRAW_DYNAMIC_STATE dynState;
|
||||
|
||||
volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
|
||||
volatile OSALIGNLINE(uint32_t) FeLock;
|
||||
volatile OSALIGNLINE(uint32_t) threadsDone;
|
||||
|
||||
SYNC_DESC retireCallback; // Call this func when this DC is retired.
|
||||
};
|
||||
|
||||
static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
|
||||
|
|
|
@ -228,8 +228,8 @@ struct SWR_VS_CONTEXT
|
|||
#if USE_SIMD16_FRONTEND
|
||||
uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
|
||||
#if USE_SIMD16_VS
|
||||
simd16scalari mask16; // IN: Active mask for shader (16-wide)
|
||||
simd16scalari VertexID16; // IN: Vertex ID (16-wide)
|
||||
simd16scalari mask16; // IN: Active mask for shader (16-wide)
|
||||
simd16scalari VertexID16; // IN: Vertex ID (16-wide)
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
@ -553,11 +553,10 @@ struct SWR_SURFACE_STATE
|
|||
// in the fetch shader jit
|
||||
struct SWR_VERTEX_BUFFER_STATE
|
||||
{
|
||||
gfxptr_t xpData;
|
||||
uint32_t index;
|
||||
uint32_t pitch;
|
||||
gfxptr_t xpData;
|
||||
uint32_t size;
|
||||
uint32_t numaNode;
|
||||
uint32_t minVertex; // min vertex (for bounds checking)
|
||||
uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
|
||||
uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
|
||||
|
@ -565,9 +564,9 @@ struct SWR_VERTEX_BUFFER_STATE
|
|||
|
||||
struct SWR_INDEX_BUFFER_STATE
|
||||
{
|
||||
const void *pIndices;
|
||||
// Format type for indices (e.g. UINT16, UINT32, etc.)
|
||||
SWR_FORMAT format; // @llvm_enum
|
||||
const void *pIndices;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
|
@ -646,12 +645,15 @@ OSALIGNLINE(struct) SWR_STATS_FE
|
|||
|
||||
struct SWR_STREAMOUT_BUFFER
|
||||
{
|
||||
bool enable;
|
||||
bool soWriteEnable;
|
||||
|
||||
// Pointers to streamout buffers.
|
||||
uint32_t* pBuffer;
|
||||
|
||||
// Offset to the SO write offset. If not null then we update offset here.
|
||||
uint32_t* pWriteOffset;
|
||||
|
||||
bool enable;
|
||||
bool soWriteEnable;
|
||||
|
||||
// Size of buffer in dwords.
|
||||
uint32_t bufferSize;
|
||||
|
||||
|
@ -660,10 +662,6 @@ struct SWR_STREAMOUT_BUFFER
|
|||
|
||||
// Offset into buffer in dwords. SOS will increment this offset.
|
||||
uint32_t streamOffset;
|
||||
|
||||
// Offset to the SO write offset. If not null then we update offset here.
|
||||
uint32_t* pWriteOffset;
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -718,6 +716,11 @@ struct SWR_GS_STATE
|
|||
{
|
||||
bool gsEnable;
|
||||
|
||||
// If true, geometry shader emits a single stream, with separate cut buffer.
|
||||
// If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
|
||||
// to map vertices to streams
|
||||
bool isSingleStream;
|
||||
|
||||
// Number of input attributes per vertex. Used by the frontend to
|
||||
// optimize assembling primitives for GS
|
||||
uint32_t numInputAttribs;
|
||||
|
@ -730,15 +733,10 @@ struct SWR_GS_STATE
|
|||
|
||||
// Maximum number of verts that can be emitted by a single instance of the GS
|
||||
uint32_t maxNumVerts;
|
||||
|
||||
|
||||
// Instance count
|
||||
uint32_t instanceCount;
|
||||
|
||||
// If true, geometry shader emits a single stream, with separate cut buffer.
|
||||
// If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
|
||||
// to map vertices to streams
|
||||
bool isSingleStream;
|
||||
|
||||
// When single stream is enabled, singleStreamID dictates which stream is being output.
|
||||
// field ignored if isSingleStream is false
|
||||
uint32_t singleStreamID;
|
||||
|
@ -768,7 +766,11 @@ struct SWR_GS_STATE
|
|||
// Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
|
||||
// expected to store the final vertex count in the first dword of the gs output stream.
|
||||
uint32_t staticVertexCount;
|
||||
|
||||
uint32_t pad;
|
||||
};
|
||||
static_assert(sizeof(SWR_GS_STATE) == 64,
|
||||
"Adjust padding to keep size (or remove this assert)");
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -814,6 +816,7 @@ enum SWR_TS_DOMAIN
|
|||
struct SWR_TS_STATE
|
||||
{
|
||||
bool tsEnable;
|
||||
|
||||
SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
|
||||
SWR_TS_PARTITIONING partitioning; // @llvm_enum
|
||||
SWR_TS_DOMAIN domain; // @llvm_enum
|
||||
|
@ -863,11 +866,11 @@ struct SWR_BLEND_STATE
|
|||
float constantColor[4];
|
||||
|
||||
// alpha test reference value in unorm8 or float32
|
||||
uint32_t alphaTestReference;
|
||||
uint32_t alphaTestReference;
|
||||
uint32_t sampleMask;
|
||||
// all RT's have the same sample count
|
||||
///@todo move this to Output Merger state when we refactor
|
||||
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
|
||||
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
|
||||
|
||||
SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
|
||||
};
|
||||
|
@ -889,8 +892,8 @@ typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsConte
|
|||
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
|
||||
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
|
||||
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
|
||||
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
|
||||
simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
|
||||
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
|
||||
simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
|
||||
uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
|
||||
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
|
||||
|
||||
|
@ -917,7 +920,7 @@ struct SWR_FRONTEND_STATE
|
|||
} provokingVertex;
|
||||
uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
|
||||
|
||||
// Size of a vertex in simdvector units. Should be sized to the
|
||||
// Size of a vertex in simdvector units. Should be sized to the
|
||||
// maximum of the input/output of the vertex shader.
|
||||
uint32_t vsVertexSize;
|
||||
};
|
||||
|
@ -1013,7 +1016,7 @@ public:
|
|||
INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
|
||||
INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
|
||||
INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
|
||||
|
||||
|
||||
INLINE void PrecalcSampleData(int numSamples); //@llvm_func
|
||||
|
||||
private:
|
||||
|
@ -1081,7 +1084,7 @@ enum SWR_CONSTANT_SOURCE
|
|||
|
||||
struct SWR_ATTRIB_SWIZZLE
|
||||
{
|
||||
uint16_t sourceAttrib : 5; // source attribute
|
||||
uint16_t sourceAttrib : 5; // source attribute
|
||||
uint16_t constantSource : 2; // constant source to apply
|
||||
uint16_t componentOverrideMask : 4; // override component with constant source
|
||||
};
|
||||
|
@ -1092,27 +1095,33 @@ struct SWR_BACKEND_STATE
|
|||
uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
|
||||
uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
|
||||
|
||||
bool swizzleEnable; // when enabled, core will parse the swizzle map when
|
||||
// setting up attributes for the backend, otherwise
|
||||
// all attributes up to numAttributes will be sent
|
||||
uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
|
||||
uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
|
||||
|
||||
bool swizzleEnable; // when enabled, core will parse the swizzle map when
|
||||
// setting up attributes for the backend, otherwise
|
||||
// all attributes up to numAttributes will be sent
|
||||
SWR_ATTRIB_SWIZZLE swizzleMap[32];
|
||||
|
||||
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
|
||||
bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t vertexAttribOffset;
|
||||
|
||||
// User clip/cull distance enables
|
||||
uint8_t cullDistanceMask;
|
||||
uint8_t clipDistanceMask;
|
||||
|
||||
// padding to ensure swizzleMap starts 64B offset from start of the struct
|
||||
// and that the next fields are dword aligned.
|
||||
uint8_t pad[10];
|
||||
|
||||
// Offset to the start of the attributes of the input vertices, in simdvector units
|
||||
uint32_t vertexAttribOffset;
|
||||
|
||||
// Offset to clip/cull attrib section of the vertex, in simdvector units
|
||||
uint32_t vertexClipCullOffset;
|
||||
|
||||
SWR_ATTRIB_SWIZZLE swizzleMap[32];
|
||||
};
|
||||
static_assert(sizeof(SWR_BACKEND_STATE) == 128,
|
||||
"Adjust padding to keep size (or remove this assert)");
|
||||
|
||||
|
||||
union SWR_DEPTH_STENCIL_STATE
|
||||
|
@ -1167,8 +1176,8 @@ enum SWR_INPUT_COVERAGE
|
|||
|
||||
enum SWR_PS_POSITION_OFFSET
|
||||
{
|
||||
SWR_PS_POSITION_SAMPLE_NONE,
|
||||
SWR_PS_POSITION_SAMPLE_OFFSET,
|
||||
SWR_PS_POSITION_SAMPLE_NONE,
|
||||
SWR_PS_POSITION_SAMPLE_OFFSET,
|
||||
SWR_PS_POSITION_CENTROID_OFFSET,
|
||||
SWR_PS_POSITION_OFFSET_COUNT,
|
||||
};
|
||||
|
@ -1194,7 +1203,7 @@ struct SWR_PS_STATE
|
|||
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
|
||||
uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
|
||||
uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
|
||||
uint32_t usesUAV : 1; // pixel shader accesses UAV
|
||||
uint32_t usesUAV : 1; // pixel shader accesses UAV
|
||||
uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
|
||||
|
||||
uint8_t renderTargetMask; // Mask of render targets written
|
||||
|
|
Loading…
Reference in New Issue