diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index b1c5134caf1..c29bb884588 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -34,6 +34,19 @@ namespace ArchRast { + ////////////////////////////////////////////////////////////////////////// + /// @brief Event handler that saves stat events to event files. This + /// handler filters out unwanted events. + class EventHandlerStatsFile : public EventHandlerFile + { + public: + EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {} + + // These are events that we're not interested in saving in stats event files. + virtual void handle(Start& event) {} + virtual void handle(End& event) {} + }; + static EventManager* FromHandle(HANDLE hThreadContext) { return reinterpret_cast(hThreadContext); @@ -47,7 +60,7 @@ namespace ArchRast uint32_t id = counter.fetch_add(1); EventManager* pManager = new EventManager(); - EventHandler* pHandler = new EventHandlerFile(id); + EventHandler* pHandler = new EventHandlerStatsFile(id); if (pManager && pHandler) { diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index cb0098d3db3..119dbdee437 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1105,6 +1105,7 @@ void DrawInstanced( DRAW_CONTEXT* pDC = GetDrawContext(pContext); AR_API_BEGIN(APIDraw, pDC->drawId); + AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance)); uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw); @@ -1230,7 +1231,7 @@ void DrawIndexedInstance( API_STATE* pState = &pDC->pState->state; AR_API_BEGIN(APIDrawIndexed, pDC->drawId); - AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); + AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw); @@ -1436,7 +1437,7 @@ void SwrDispatch( DRAW_CONTEXT* pDC = GetDrawContext(pContext); AR_API_BEGIN(APIDispatch, pDC->drawId); - + AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ)); pDC->isCompute = true; // This is a compute context. COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64); @@ -1593,7 +1594,11 @@ void SwrEnableStats( void SWR_API SwrEndFrame( HANDLE hContext) { - RDTSC_ENDFRAME(); SWR_CONTEXT *pContext = GetContext(hContext); + DRAW_CONTEXT* pDC = GetDrawContext(pContext); + + RDTSC_ENDFRAME(); + AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId)); + pContext->frameCount++; } diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 08a4d17821c..6e7495cdc50 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -315,7 +315,7 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastReti ////////////////////////////////////////////////////////////////////////// /// @brief Update client stats. -INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) +INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC) { if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false)) { @@ -334,12 +334,13 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) stats.CsInvocations += dynState.pStats[i].CsInvocations; } + pContext->pfnUpdateStats(GetPrivateState(pDC), &stats); } -INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) +INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC) { - UpdateClientStats(pContext, pDC); + UpdateClientStats(pContext, workerId, pDC); if (pDC->retireCallback.pfnCallbackFunc) { @@ -350,14 +351,14 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) } // inlined-only version -INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) +INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC) { int32_t result = InterlockedDecrement((volatile LONG*)&pDC->threadsDone); SWR_ASSERT(result >= 0); if (result == 0) { - ExecuteCallbacks(pContext, pDC); + ExecuteCallbacks(pContext, workerId, pDC); // Cleanup memory allocations pDC->pArena->Reset(true); @@ -381,10 +382,10 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) // available to other translation modules int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) { - return CompleteDrawContextInl(pContext, pDC); + return CompleteDrawContextInl(pContext, 0, pDC); } -INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE, uint32_t& drawEnqueued) +INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued) { // increment our current draw id to the first incomplete draw drawEnqueued = GetEnqueuedDraw(pContext); @@ -402,7 +403,7 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE, if (isWorkComplete) { curDrawBE++; - CompleteDrawContextInl(pContext, pDC); + CompleteDrawContextInl(pContext, workerId, pDC); } else { @@ -442,7 +443,7 @@ bool WorkOnFifoBE( // Find the first incomplete draw that has pending work. If no such draw is found then // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE. uint32_t drawEnqueued = 0; - if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false) + if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false) { return false; } @@ -537,7 +538,7 @@ bool WorkOnFifoBE( { // We can increment the current BE and safely move to next draw since we know this draw is complete. curDrawBE++; - CompleteDrawContextInl(pContext, pDC); + CompleteDrawContextInl(pContext, workerId, pDC); lastRetiredDraw++; @@ -563,11 +564,20 @@ bool WorkOnFifoBE( ////////////////////////////////////////////////////////////////////////// /// @brief Called when FE work is complete for this DC. -INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) +INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC) { if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats) { - pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE); + SWR_STATS_FE& stats = pDC->dynState.statsFE; + + AR_EVENT(FrontendStatsEvent( + stats.IaVertices, stats.IaPrimitives, stats.VsInvocations, stats.HsInvocations, + stats.DsInvocations, stats.GsInvocations, stats.GsPrimitives, stats.CInvocations, stats.CPrimitives, + stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3], + stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3] + )); + + pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats); } if (pContext->pfnUpdateSoWriteOffset) @@ -598,7 +608,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE) DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot]; if (pDC->isCompute || pDC->doneFE || pDC->FeLock) { - CompleteDrawContextInl(pContext, pDC); + CompleteDrawContextInl(pContext, workerId, pDC); curDrawFE++; } else @@ -621,7 +631,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE) // successfully grabbed the DC, now run the FE pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc); - CompleteDrawFE(pContext, pDC); + CompleteDrawFE(pContext, workerId, pDC); } } curDraw++; @@ -641,7 +651,7 @@ void WorkOnCompute( uint32_t& curDrawBE) { uint32_t drawEnqueued = 0; - if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false) + if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false) { return; } diff --git a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandlerfile_h.template b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandlerfile_h.template index 6264442b089..2e3b5c32464 100644 --- a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandlerfile_h.template +++ b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandlerfile_h.template @@ -32,6 +32,7 @@ #include "common/os.h" #include "${event_header}" #include +#include namespace ArchRast { @@ -44,11 +45,19 @@ namespace ArchRast EventHandlerFile(uint32_t id) { #if defined(_WIN32) + DWORD pid = GetCurrentProcessId(); + TCHAR procname[MAX_PATH]; + GetModuleFileName(NULL, procname, MAX_PATH); + const char* pBaseName = strrchr(procname, '\\'); + std::stringstream outDir; + outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; + CreateDirectory(outDir.str().c_str(), NULL); + char buf[255]; // There could be multiple threads creating thread pools. We // want to make sure they are uniquly identified by adding in // the creator's thread id into the filename. - sprintf(buf, "\\ar_event%d_%d.bin", GetCurrentThreadId(), id); + sprintf(buf, "%s\\ar_event%d_%d.bin", outDir.str().c_str(), GetCurrentThreadId(), id); mFilename = std::string(buf); #else SWR_ASSERT(0);