swr: [rasterizer core] allow override of KNOB thread settings
- Remove HYPERTHREADED_FE support - Add threading info as optional data passed to SwrCreateContext. If supplied this data will override any KNOB thread settings. Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
parent
e0c10306f5
commit
29e1c4a8a9
|
@ -75,6 +75,17 @@ HANDLE SwrCreateContext(
|
|||
pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
|
||||
pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
|
||||
|
||||
pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
|
||||
pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
|
||||
pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
|
||||
pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
|
||||
pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
|
||||
|
||||
if (pCreateInfo->pThreadInfo)
|
||||
{
|
||||
pContext->threadInfo = *pCreateInfo->pThreadInfo;
|
||||
}
|
||||
|
||||
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
|
||||
{
|
||||
pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
|
||||
|
@ -84,7 +95,7 @@ HANDLE SwrCreateContext(
|
|||
pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
|
||||
}
|
||||
|
||||
if (!KNOB_SINGLE_THREADED)
|
||||
if (!pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
|
||||
memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
|
||||
|
@ -95,9 +106,8 @@ HANDLE SwrCreateContext(
|
|||
}
|
||||
|
||||
// Calling createThreadPool() above can set SINGLE_THREADED
|
||||
if (KNOB_SINGLE_THREADED)
|
||||
if (pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
SET_KNOB(HYPERTHREADED_FE, false);
|
||||
pContext->NumWorkerThreads = 1;
|
||||
pContext->NumFEThreads = 1;
|
||||
pContext->NumBEThreads = 1;
|
||||
|
@ -218,7 +228,7 @@ void QueueWork(SWR_CONTEXT *pContext)
|
|||
pContext->dcRing.Enqueue();
|
||||
}
|
||||
|
||||
if (KNOB_SINGLE_THREADED)
|
||||
if (pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
// flush denormals to 0
|
||||
uint32_t mxcsr = _mm_getcsr();
|
||||
|
|
|
@ -90,6 +90,18 @@ typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
|
|||
|
||||
class BucketManager;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_THREADING_INFO
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
struct SWR_THREADING_INFO
|
||||
{
|
||||
uint32_t MAX_WORKER_THREADS;
|
||||
uint32_t MAX_NUMA_NODES;
|
||||
uint32_t MAX_CORES_PER_NUMA_NODE;
|
||||
uint32_t MAX_THREADS_PER_CORE;
|
||||
bool SINGLE_THREADED;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_CREATECONTEXT_INFO
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
@ -113,6 +125,9 @@ struct SWR_CREATECONTEXT_INFO
|
|||
|
||||
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
|
||||
size_t contextSaveSize;
|
||||
|
||||
// Input (optional): Threading info that overrides any set KNOB values.
|
||||
SWR_THREADING_INFO* pThreadInfo;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -464,6 +464,7 @@ struct SWR_CONTEXT
|
|||
uint32_t NumBEThreads;
|
||||
|
||||
THREAD_POOL threadPool; // Thread pool associated with this context
|
||||
SWR_THREADING_INFO threadInfo;
|
||||
|
||||
std::condition_variable FifosNotEmpty;
|
||||
std::mutex WaitLock;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -239,10 +239,10 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
|
|||
}
|
||||
|
||||
|
||||
void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
|
||||
void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
|
||||
{
|
||||
// Only bind threads when MAX_WORKER_THREADS isn't set.
|
||||
if (KNOB_MAX_WORKER_THREADS && bindProcGroup == false)
|
||||
if (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -267,9 +267,9 @@ void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=
|
|||
else
|
||||
#endif
|
||||
{
|
||||
// If KNOB_MAX_WORKER_THREADS is set, only bind to the proc group,
|
||||
// If MAX_WORKER_THREADS is set, only bind to the proc group,
|
||||
// Not the individual HW thread.
|
||||
if (!KNOB_MAX_WORKER_THREADS)
|
||||
if (!pContext->threadInfo.MAX_WORKER_THREADS)
|
||||
{
|
||||
affinity.Mask = KAFFINITY(1) << threadId;
|
||||
}
|
||||
|
@ -648,7 +648,7 @@ DWORD workerThreadMain(LPVOID pData)
|
|||
uint32_t threadId = pThreadData->threadId;
|
||||
uint32_t workerId = pThreadData->workerId;
|
||||
|
||||
bindThread(threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
|
||||
bindThread(pContext, threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
|
||||
|
||||
RDTSC_INIT(threadId);
|
||||
|
||||
|
@ -771,7 +771,7 @@ template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
|
|||
|
||||
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
||||
{
|
||||
bindThread(0);
|
||||
bindThread(pContext, 0);
|
||||
|
||||
CPUNumaNodes nodes;
|
||||
uint32_t numThreadsPerProcGroup = 0;
|
||||
|
@ -796,33 +796,23 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
|||
uint32_t numCoresPerNode = numHWCoresPerNode;
|
||||
uint32_t numHyperThreads = numHWHyperThreads;
|
||||
|
||||
if (KNOB_MAX_WORKER_THREADS)
|
||||
if (pContext->threadInfo.MAX_NUMA_NODES)
|
||||
{
|
||||
SET_KNOB(HYPERTHREADED_FE, false);
|
||||
numNodes = std::min(numNodes, pContext->threadInfo.MAX_NUMA_NODES);
|
||||
}
|
||||
|
||||
if (KNOB_HYPERTHREADED_FE)
|
||||
if (pContext->threadInfo.MAX_CORES_PER_NUMA_NODE)
|
||||
{
|
||||
SET_KNOB(MAX_THREADS_PER_CORE, 0);
|
||||
numCoresPerNode = std::min(numCoresPerNode, pContext->threadInfo.MAX_CORES_PER_NUMA_NODE);
|
||||
}
|
||||
|
||||
if (KNOB_MAX_NUMA_NODES)
|
||||
if (pContext->threadInfo.MAX_THREADS_PER_CORE)
|
||||
{
|
||||
numNodes = std::min(numNodes, KNOB_MAX_NUMA_NODES);
|
||||
}
|
||||
|
||||
if (KNOB_MAX_CORES_PER_NUMA_NODE)
|
||||
{
|
||||
numCoresPerNode = std::min(numCoresPerNode, KNOB_MAX_CORES_PER_NUMA_NODE);
|
||||
}
|
||||
|
||||
if (KNOB_MAX_THREADS_PER_CORE)
|
||||
{
|
||||
numHyperThreads = std::min(numHyperThreads, KNOB_MAX_THREADS_PER_CORE);
|
||||
numHyperThreads = std::min(numHyperThreads, pContext->threadInfo.MAX_THREADS_PER_CORE);
|
||||
}
|
||||
|
||||
#if defined(_WIN32) && !defined(_WIN64)
|
||||
if (!KNOB_MAX_WORKER_THREADS)
|
||||
if (!pContext->threadInfo.MAX_WORKER_THREADS)
|
||||
{
|
||||
// Limit 32-bit windows to bindable HW threads only
|
||||
if ((numCoresPerNode * numHWHyperThreads) > 32)
|
||||
|
@ -832,19 +822,14 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (numHyperThreads < 2)
|
||||
{
|
||||
SET_KNOB(HYPERTHREADED_FE, false);
|
||||
}
|
||||
|
||||
// Calculate numThreads
|
||||
uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
|
||||
numThreads = std::min(numThreads, numHWThreads);
|
||||
|
||||
if (KNOB_MAX_WORKER_THREADS)
|
||||
if (pContext->threadInfo.MAX_WORKER_THREADS)
|
||||
{
|
||||
uint32_t maxHWThreads = numHWNodes * numHWCoresPerNode * numHWHyperThreads;
|
||||
numThreads = std::min(KNOB_MAX_WORKER_THREADS, maxHWThreads);
|
||||
numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, maxHWThreads);
|
||||
}
|
||||
|
||||
if (numThreads > KNOB_MAX_NUM_THREADS)
|
||||
|
@ -900,7 +885,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
|||
pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
|
||||
pPool->numaMask = 0;
|
||||
|
||||
if (KNOB_MAX_WORKER_THREADS)
|
||||
if (pContext->threadInfo.MAX_WORKER_THREADS)
|
||||
{
|
||||
bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
|
||||
uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
|
||||
|
@ -962,25 +947,9 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
|||
pPool->pThreadData[workerId].htId = t;
|
||||
pPool->pThreadData[workerId].pContext = pContext;
|
||||
|
||||
if (KNOB_HYPERTHREADED_FE)
|
||||
{
|
||||
if (t == 0)
|
||||
{
|
||||
pContext->NumBEThreads++;
|
||||
pPool->threads[workerId] = new std::thread(workerThreadInit<false, true>, &pPool->pThreadData[workerId]);
|
||||
}
|
||||
else
|
||||
{
|
||||
pContext->NumFEThreads++;
|
||||
pPool->threads[workerId] = new std::thread(workerThreadInit<true, false>, &pPool->pThreadData[workerId]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
|
||||
pContext->NumBEThreads++;
|
||||
pContext->NumFEThreads++;
|
||||
}
|
||||
pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
|
||||
pContext->NumBEThreads++;
|
||||
pContext->NumFEThreads++;
|
||||
|
||||
++workerId;
|
||||
}
|
||||
|
@ -991,7 +960,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
|||
|
||||
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
||||
{
|
||||
if (!KNOB_SINGLE_THREADED)
|
||||
if (!pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
// Inform threads to finish up
|
||||
std::unique_lock<std::mutex> lock(pContext->WaitLock);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -45,7 +45,7 @@ struct THREAD_DATA
|
|||
uint32_t htId; // Hyperthread id
|
||||
uint32_t workerId;
|
||||
SWR_CONTEXT *pContext;
|
||||
bool forceBindProcGroup; // Only useful when KNOB_MAX_WORKER_THREADS is set.
|
||||
bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -30,18 +30,6 @@ KNOBS = [
|
|||
'category' : 'debug',
|
||||
}],
|
||||
|
||||
['HYPERTHREADED_FE', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['EXPERIMENTAL!!',
|
||||
'If enabled will attempt to use secondary threads per core to perform',
|
||||
'front-end (VS/GS) work.',
|
||||
'',
|
||||
'Note: Setting this will cause KNOB_MAX_THREADS_PER_CORE to be ignored.'],
|
||||
'category' : 'perf',
|
||||
'advanced' : 'true',
|
||||
}],
|
||||
|
||||
['DUMP_SHADER_IR', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
|
|
Loading…
Reference in New Issue