swr/rast: enable per-warp scratch space for CS
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
0424e6249a
commit
dabd0499a6
|
@ -592,12 +592,16 @@ void SwrSetCsFunc(
|
|||
HANDLE hContext,
|
||||
PFN_CS_FUNC pfnCsFunc,
|
||||
uint32_t totalThreadsInGroup,
|
||||
uint32_t totalSpillFillSize)
|
||||
uint32_t totalSpillFillSize,
|
||||
uint32_t scratchSpaceSizePerInstance,
|
||||
uint32_t numInstances)
|
||||
{
|
||||
API_STATE* pState = GetDrawState(GetContext(hContext));
|
||||
pState->pfnCsFunc = pfnCsFunc;
|
||||
pState->totalThreadsInGroup = totalThreadsInGroup;
|
||||
pState->totalSpillFillSize = totalSpillFillSize;
|
||||
pState->scratchSpaceSize = scratchSpaceSizePerInstance;
|
||||
pState->scratchSpaceNumInstances = numInstances;
|
||||
}
|
||||
|
||||
void SwrSetTsState(
|
||||
|
|
|
@ -366,11 +366,16 @@ void SWR_API SwrSetGsFunc(
|
|||
/// @param pfnCsFunc - Pointer to compute shader function
|
||||
/// @param totalThreadsInGroup - product of thread group dimensions.
|
||||
/// @param totalSpillFillSize - size in bytes needed for spill/fill.
|
||||
/// @param scratchSpaceSizePerInstance - size of the scratch space needed per simd instance
|
||||
/// @param numInstances - number of simd instances that are run per execution of the shader
|
||||
void SWR_API SwrSetCsFunc(
|
||||
HANDLE hContext,
|
||||
PFN_CS_FUNC pfnCsFunc,
|
||||
uint32_t totalThreadsInGroup,
|
||||
uint32_t totalSpillFillSize);
|
||||
uint32_t totalSpillFillSize,
|
||||
uint32_t scratchSpaceSizePerInstance,
|
||||
uint32_t numInstances
|
||||
);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set tessellation state.
|
||||
|
|
|
@ -45,7 +45,7 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
|
|||
/// @param pDC - pointer to draw context (dispatch).
|
||||
/// @param workerId - The unique worker ID that is assigned to this thread.
|
||||
/// @param threadGroupId - the linear index for the thread group within the dispatch.
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
|
@ -61,6 +61,12 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
|
|||
pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
|
||||
}
|
||||
|
||||
size_t scratchSpaceSize = pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
|
||||
if (scratchSpaceSize && pScratchSpace == nullptr)
|
||||
{
|
||||
pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD_BYTES);
|
||||
}
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
|
||||
SWR_CS_CONTEXT csContext{ 0 };
|
||||
|
@ -70,6 +76,8 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
|
|||
csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
|
||||
csContext.pTGSM = pContext->ppScratch[workerId];
|
||||
csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
|
||||
csContext.pScratchSpace = (uint8_t*)pScratchSpace;
|
||||
csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
|
||||
|
||||
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
#include "depthstencil.h"
|
||||
#include "rdtsc_core.h"
|
||||
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
|
||||
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
|
||||
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
|
||||
void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
|
|
|
@ -245,6 +245,8 @@ OSALIGNLINE(struct) API_STATE
|
|||
PFN_CS_FUNC pfnCsFunc;
|
||||
uint32_t totalThreadsInGroup;
|
||||
uint32_t totalSpillFillSize;
|
||||
uint32_t scratchSpaceSize;
|
||||
uint32_t scratchSpaceNumInstances;
|
||||
|
||||
// FE - Frontend State
|
||||
SWR_FRONTEND_STATE frontendState;
|
||||
|
|
|
@ -378,6 +378,11 @@ struct SWR_CS_CONTEXT
|
|||
uint8_t* pTGSM; // Thread Group Shared Memory pointer.
|
||||
|
||||
uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
|
||||
|
||||
uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
|
||||
// for subdividing scratch space per instance/simd
|
||||
|
||||
uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
|
||||
};
|
||||
|
||||
// enums
|
||||
|
|
|
@ -726,10 +726,11 @@ void WorkOnCompute(
|
|||
if (queue.getNumQueued() > 0)
|
||||
{
|
||||
void* pSpillFillBuffer = nullptr;
|
||||
void* pScratchSpace = nullptr;
|
||||
uint32_t threadGroupId = 0;
|
||||
while (queue.getWork(threadGroupId))
|
||||
{
|
||||
queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
|
||||
queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
|
||||
queue.finishedWork();
|
||||
}
|
||||
|
||||
|
|
|
@ -151,7 +151,7 @@ private:
|
|||
OSALIGNLINE(volatile LONG) mWorkItemsConsumed { 0 };
|
||||
};
|
||||
|
||||
typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
|
||||
typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// DispatchQueue - work queue for dispatch
|
||||
|
@ -231,10 +231,10 @@ public:
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Dispatches a unit of work
|
||||
void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
|
||||
void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
|
||||
{
|
||||
SWR_ASSERT(mPfnDispatch != nullptr);
|
||||
mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer);
|
||||
mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
|
||||
}
|
||||
|
||||
void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
|
||||
|
|
Loading…
Reference in New Issue