swr: [rasterizer archrast] add support for stats files
Only stat and counter events are saved to the event files. Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
parent
f4684cdb5f
commit
3805e40f32
|
@ -34,6 +34,19 @@
|
|||
|
||||
namespace ArchRast
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Event handler that saves stat events to event files. This
|
||||
/// handler filters out unwanted events.
|
||||
class EventHandlerStatsFile : public EventHandlerFile
|
||||
{
|
||||
public:
|
||||
EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
|
||||
|
||||
// These are events that we're not interested in saving in stats event files.
|
||||
virtual void handle(Start& event) {}
|
||||
virtual void handle(End& event) {}
|
||||
};
|
||||
|
||||
static EventManager* FromHandle(HANDLE hThreadContext)
|
||||
{
|
||||
return reinterpret_cast<EventManager*>(hThreadContext);
|
||||
|
@ -47,7 +60,7 @@ namespace ArchRast
|
|||
uint32_t id = counter.fetch_add(1);
|
||||
|
||||
EventManager* pManager = new EventManager();
|
||||
EventHandler* pHandler = new EventHandlerFile(id);
|
||||
EventHandler* pHandler = new EventHandlerStatsFile(id);
|
||||
|
||||
if (pManager && pHandler)
|
||||
{
|
||||
|
|
|
@ -1105,6 +1105,7 @@ void DrawInstanced(
|
|||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APIDraw, pDC->drawId);
|
||||
AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance));
|
||||
|
||||
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
|
||||
uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
|
||||
|
@ -1230,7 +1231,7 @@ void DrawIndexedInstance(
|
|||
API_STATE* pState = &pDC->pState->state;
|
||||
|
||||
AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
|
||||
AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
|
||||
AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
|
||||
|
||||
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
|
||||
uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
|
||||
|
@ -1436,7 +1437,7 @@ void SwrDispatch(
|
|||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APIDispatch, pDC->drawId);
|
||||
|
||||
AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
|
||||
pDC->isCompute = true; // This is a compute context.
|
||||
|
||||
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
|
||||
|
@ -1593,7 +1594,11 @@ void SwrEnableStats(
|
|||
void SWR_API SwrEndFrame(
|
||||
HANDLE hContext)
|
||||
{
|
||||
RDTSC_ENDFRAME();
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
RDTSC_ENDFRAME();
|
||||
AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId));
|
||||
|
||||
pContext->frameCount++;
|
||||
}
|
||||
|
|
|
@ -315,7 +315,7 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastReti
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Update client stats.
|
||||
INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false))
|
||||
{
|
||||
|
@ -334,12 +334,13 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
|||
stats.CsInvocations += dynState.pStats[i].CsInvocations;
|
||||
}
|
||||
|
||||
|
||||
pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
|
||||
}
|
||||
|
||||
INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
UpdateClientStats(pContext, pDC);
|
||||
UpdateClientStats(pContext, workerId, pDC);
|
||||
|
||||
if (pDC->retireCallback.pfnCallbackFunc)
|
||||
{
|
||||
|
@ -350,14 +351,14 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
|||
}
|
||||
|
||||
// inlined-only version
|
||||
INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
int32_t result = InterlockedDecrement((volatile LONG*)&pDC->threadsDone);
|
||||
SWR_ASSERT(result >= 0);
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
ExecuteCallbacks(pContext, pDC);
|
||||
ExecuteCallbacks(pContext, workerId, pDC);
|
||||
|
||||
// Cleanup memory allocations
|
||||
pDC->pArena->Reset(true);
|
||||
|
@ -381,10 +382,10 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
|||
// available to other translation modules
|
||||
int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
return CompleteDrawContextInl(pContext, pDC);
|
||||
return CompleteDrawContextInl(pContext, 0, pDC);
|
||||
}
|
||||
|
||||
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE, uint32_t& drawEnqueued)
|
||||
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued)
|
||||
{
|
||||
// increment our current draw id to the first incomplete draw
|
||||
drawEnqueued = GetEnqueuedDraw(pContext);
|
||||
|
@ -402,7 +403,7 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE,
|
|||
if (isWorkComplete)
|
||||
{
|
||||
curDrawBE++;
|
||||
CompleteDrawContextInl(pContext, pDC);
|
||||
CompleteDrawContextInl(pContext, workerId, pDC);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -442,7 +443,7 @@ bool WorkOnFifoBE(
|
|||
// Find the first incomplete draw that has pending work. If no such draw is found then
|
||||
// return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
|
||||
uint32_t drawEnqueued = 0;
|
||||
if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
|
||||
if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -537,7 +538,7 @@ bool WorkOnFifoBE(
|
|||
{
|
||||
// We can increment the current BE and safely move to next draw since we know this draw is complete.
|
||||
curDrawBE++;
|
||||
CompleteDrawContextInl(pContext, pDC);
|
||||
CompleteDrawContextInl(pContext, workerId, pDC);
|
||||
|
||||
lastRetiredDraw++;
|
||||
|
||||
|
@ -563,11 +564,20 @@ bool WorkOnFifoBE(
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Called when FE work is complete for this DC.
|
||||
INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats)
|
||||
{
|
||||
pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE);
|
||||
SWR_STATS_FE& stats = pDC->dynState.statsFE;
|
||||
|
||||
AR_EVENT(FrontendStatsEvent(
|
||||
stats.IaVertices, stats.IaPrimitives, stats.VsInvocations, stats.HsInvocations,
|
||||
stats.DsInvocations, stats.GsInvocations, stats.GsPrimitives, stats.CInvocations, stats.CPrimitives,
|
||||
stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3],
|
||||
stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3]
|
||||
));
|
||||
|
||||
pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
|
||||
}
|
||||
|
||||
if (pContext->pfnUpdateSoWriteOffset)
|
||||
|
@ -598,7 +608,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
|
|||
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
|
||||
if (pDC->isCompute || pDC->doneFE || pDC->FeLock)
|
||||
{
|
||||
CompleteDrawContextInl(pContext, pDC);
|
||||
CompleteDrawContextInl(pContext, workerId, pDC);
|
||||
curDrawFE++;
|
||||
}
|
||||
else
|
||||
|
@ -621,7 +631,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
|
|||
// successfully grabbed the DC, now run the FE
|
||||
pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc);
|
||||
|
||||
CompleteDrawFE(pContext, pDC);
|
||||
CompleteDrawFE(pContext, workerId, pDC);
|
||||
}
|
||||
}
|
||||
curDraw++;
|
||||
|
@ -641,7 +651,7 @@ void WorkOnCompute(
|
|||
uint32_t& curDrawBE)
|
||||
{
|
||||
uint32_t drawEnqueued = 0;
|
||||
if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
|
||||
if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "common/os.h"
|
||||
#include "${event_header}"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
namespace ArchRast
|
||||
{
|
||||
|
@ -44,11 +45,19 @@ namespace ArchRast
|
|||
EventHandlerFile(uint32_t id)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
DWORD pid = GetCurrentProcessId();
|
||||
TCHAR procname[MAX_PATH];
|
||||
GetModuleFileName(NULL, procname, MAX_PATH);
|
||||
const char* pBaseName = strrchr(procname, '\\');
|
||||
std::stringstream outDir;
|
||||
outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
|
||||
CreateDirectory(outDir.str().c_str(), NULL);
|
||||
|
||||
char buf[255];
|
||||
// There could be multiple threads creating thread pools. We
|
||||
// want to make sure they are uniquly identified by adding in
|
||||
// the creator's thread id into the filename.
|
||||
sprintf(buf, "\\ar_event%d_%d.bin", GetCurrentThreadId(), id);
|
||||
sprintf(buf, "%s\\ar_event%d_%d.bin", outDir.str().c_str(), GetCurrentThreadId(), id);
|
||||
mFilename = std::string(buf);
|
||||
#else
|
||||
SWR_ASSERT(0);
|
||||
|
|
Loading…
Reference in New Issue