swr: [rasterizer core] allow override of KNOB thread settings

- Remove HYPERTHREADED_FE support
- Add threading info as optional data passed to SwrCreateContext.
  If supplied this data will override any KNOB thread settings.

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
Tim Rowley 2016-08-03 17:59:37 -06:00
parent e0c10306f5
commit 29e1c4a8a9
6 changed files with 53 additions and 70 deletions

View File

@ -75,6 +75,17 @@ HANDLE SwrCreateContext(
pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
if (pCreateInfo->pThreadInfo)
{
pContext->threadInfo = *pCreateInfo->pThreadInfo;
}
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
{
pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
@ -84,7 +95,7 @@ HANDLE SwrCreateContext(
pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
}
if (!KNOB_SINGLE_THREADED)
if (!pContext->threadInfo.SINGLE_THREADED)
{
memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
@ -95,9 +106,8 @@ HANDLE SwrCreateContext(
}
// Calling createThreadPool() above can set SINGLE_THREADED
if (KNOB_SINGLE_THREADED)
if (pContext->threadInfo.SINGLE_THREADED)
{
SET_KNOB(HYPERTHREADED_FE, false);
pContext->NumWorkerThreads = 1;
pContext->NumFEThreads = 1;
pContext->NumBEThreads = 1;
@ -218,7 +228,7 @@ void QueueWork(SWR_CONTEXT *pContext)
pContext->dcRing.Enqueue();
}
if (KNOB_SINGLE_THREADED)
if (pContext->threadInfo.SINGLE_THREADED)
{
// flush denormals to 0
uint32_t mxcsr = _mm_getcsr();

View File

@ -90,6 +90,18 @@ typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
class BucketManager;
//////////////////////////////////////////////////////////////////////////
/// SWR_THREADING_INFO
/////////////////////////////////////////////////////////////////////////
struct SWR_THREADING_INFO
{
uint32_t MAX_WORKER_THREADS;
uint32_t MAX_NUMA_NODES;
uint32_t MAX_CORES_PER_NUMA_NODE;
uint32_t MAX_THREADS_PER_CORE;
bool SINGLE_THREADED;
};
//////////////////////////////////////////////////////////////////////////
/// SWR_CREATECONTEXT_INFO
/////////////////////////////////////////////////////////////////////////
@ -113,6 +125,9 @@ struct SWR_CREATECONTEXT_INFO
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
size_t contextSaveSize;
// Input (optional): Threading info that overrides any set KNOB values.
SWR_THREADING_INFO* pThreadInfo;
};
//////////////////////////////////////////////////////////////////////////

View File

@ -464,6 +464,7 @@ struct SWR_CONTEXT
uint32_t NumBEThreads;
THREAD_POOL threadPool; // Thread pool associated with this context
SWR_THREADING_INFO threadInfo;
std::condition_variable FifosNotEmpty;
std::mutex WaitLock;

View File

@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -239,10 +239,10 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
}
void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
{
// Only bind threads when MAX_WORKER_THREADS isn't set.
if (KNOB_MAX_WORKER_THREADS && bindProcGroup == false)
if (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false)
{
return;
}
@ -267,9 +267,9 @@ void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=
else
#endif
{
// If KNOB_MAX_WORKER_THREADS is set, only bind to the proc group,
// If MAX_WORKER_THREADS is set, only bind to the proc group,
// Not the individual HW thread.
if (!KNOB_MAX_WORKER_THREADS)
if (!pContext->threadInfo.MAX_WORKER_THREADS)
{
affinity.Mask = KAFFINITY(1) << threadId;
}
@ -648,7 +648,7 @@ DWORD workerThreadMain(LPVOID pData)
uint32_t threadId = pThreadData->threadId;
uint32_t workerId = pThreadData->workerId;
bindThread(threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
bindThread(pContext, threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
RDTSC_INIT(threadId);
@ -771,7 +771,7 @@ template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
bindThread(0);
bindThread(pContext, 0);
CPUNumaNodes nodes;
uint32_t numThreadsPerProcGroup = 0;
@ -796,33 +796,23 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
uint32_t numCoresPerNode = numHWCoresPerNode;
uint32_t numHyperThreads = numHWHyperThreads;
if (KNOB_MAX_WORKER_THREADS)
if (pContext->threadInfo.MAX_NUMA_NODES)
{
SET_KNOB(HYPERTHREADED_FE, false);
numNodes = std::min(numNodes, pContext->threadInfo.MAX_NUMA_NODES);
}
if (KNOB_HYPERTHREADED_FE)
if (pContext->threadInfo.MAX_CORES_PER_NUMA_NODE)
{
SET_KNOB(MAX_THREADS_PER_CORE, 0);
numCoresPerNode = std::min(numCoresPerNode, pContext->threadInfo.MAX_CORES_PER_NUMA_NODE);
}
if (KNOB_MAX_NUMA_NODES)
if (pContext->threadInfo.MAX_THREADS_PER_CORE)
{
numNodes = std::min(numNodes, KNOB_MAX_NUMA_NODES);
}
if (KNOB_MAX_CORES_PER_NUMA_NODE)
{
numCoresPerNode = std::min(numCoresPerNode, KNOB_MAX_CORES_PER_NUMA_NODE);
}
if (KNOB_MAX_THREADS_PER_CORE)
{
numHyperThreads = std::min(numHyperThreads, KNOB_MAX_THREADS_PER_CORE);
numHyperThreads = std::min(numHyperThreads, pContext->threadInfo.MAX_THREADS_PER_CORE);
}
#if defined(_WIN32) && !defined(_WIN64)
if (!KNOB_MAX_WORKER_THREADS)
if (!pContext->threadInfo.MAX_WORKER_THREADS)
{
// Limit 32-bit windows to bindable HW threads only
if ((numCoresPerNode * numHWHyperThreads) > 32)
@ -832,19 +822,14 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
}
#endif
if (numHyperThreads < 2)
{
SET_KNOB(HYPERTHREADED_FE, false);
}
// Calculate numThreads
uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
numThreads = std::min(numThreads, numHWThreads);
if (KNOB_MAX_WORKER_THREADS)
if (pContext->threadInfo.MAX_WORKER_THREADS)
{
uint32_t maxHWThreads = numHWNodes * numHWCoresPerNode * numHWHyperThreads;
numThreads = std::min(KNOB_MAX_WORKER_THREADS, maxHWThreads);
numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, maxHWThreads);
}
if (numThreads > KNOB_MAX_NUM_THREADS)
@ -900,7 +885,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
pPool->numaMask = 0;
if (KNOB_MAX_WORKER_THREADS)
if (pContext->threadInfo.MAX_WORKER_THREADS)
{
bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
@ -962,25 +947,9 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;
if (KNOB_HYPERTHREADED_FE)
{
if (t == 0)
{
pContext->NumBEThreads++;
pPool->threads[workerId] = new std::thread(workerThreadInit<false, true>, &pPool->pThreadData[workerId]);
}
else
{
pContext->NumFEThreads++;
pPool->threads[workerId] = new std::thread(workerThreadInit<true, false>, &pPool->pThreadData[workerId]);
}
}
else
{
pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
pContext->NumBEThreads++;
pContext->NumFEThreads++;
}
pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
pContext->NumBEThreads++;
pContext->NumFEThreads++;
++workerId;
}
@ -991,7 +960,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
if (!KNOB_SINGLE_THREADED)
if (!pContext->threadInfo.SINGLE_THREADED)
{
// Inform threads to finish up
std::unique_lock<std::mutex> lock(pContext->WaitLock);

View File

@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -45,7 +45,7 @@ struct THREAD_DATA
uint32_t htId; // Hyperthread id
uint32_t workerId;
SWR_CONTEXT *pContext;
bool forceBindProcGroup; // Only useful when KNOB_MAX_WORKER_THREADS is set.
bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
};

View File

@ -30,18 +30,6 @@ KNOBS = [
'category' : 'debug',
}],
['HYPERTHREADED_FE', {
'type' : 'bool',
'default' : 'false',
'desc' : ['EXPERIMENTAL!!',
'If enabled will attempt to use secondary threads per core to perform',
'front-end (VS/GS) work.',
'',
'Note: Setting this will cause KNOB_MAX_THREADS_PER_CORE to be ignored.'],
'category' : 'perf',
'advanced' : 'true',
}],
['DUMP_SHADER_IR', {
'type' : 'bool',
'default' : 'false',