swr/rast: adjust linux cpu topology identification code
Make more robust to handle strange strange configurations like a vmware exported 4-way numa X 1-core configuration. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
1ccf9ad280
commit
ead0dfe31e
|
@ -169,37 +169,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
|
|||
std::ifstream input("/proc/cpuinfo");
|
||||
std::string line;
|
||||
char* c;
|
||||
uint32_t threadId = uint32_t(-1);
|
||||
uint32_t procId = uint32_t(-1);
|
||||
uint32_t coreId = uint32_t(-1);
|
||||
uint32_t numaId = uint32_t(-1);
|
||||
uint32_t physId = uint32_t(-1);
|
||||
|
||||
while (std::getline(input, line))
|
||||
{
|
||||
if (line.find("processor") != std::string::npos)
|
||||
{
|
||||
if (threadId != uint32_t(-1))
|
||||
{
|
||||
// Save information.
|
||||
if (out_nodes.size() <= numaId)
|
||||
{
|
||||
out_nodes.resize(numaId + 1);
|
||||
}
|
||||
|
||||
auto& numaNode = out_nodes[numaId];
|
||||
if (numaNode.cores.size() <= coreId)
|
||||
{
|
||||
numaNode.cores.resize(coreId + 1);
|
||||
}
|
||||
|
||||
auto& core = numaNode.cores[coreId];
|
||||
core.procGroup = coreId;
|
||||
core.threadIds.push_back(threadId);
|
||||
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
|
||||
auto data_start = line.find(": ") + 2;
|
||||
threadId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
procId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
continue;
|
||||
}
|
||||
if (line.find("core id") != std::string::npos)
|
||||
|
@ -211,29 +190,32 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
|
|||
if (line.find("physical id") != std::string::npos)
|
||||
{
|
||||
auto data_start = line.find(": ") + 2;
|
||||
numaId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
physId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
continue;
|
||||
}
|
||||
if (line.length() == 0)
|
||||
{
|
||||
if (physId + 1 > out_nodes.size())
|
||||
out_nodes.resize(physId + 1);
|
||||
auto& numaNode = out_nodes[physId];
|
||||
numaNode.numaId = physId;
|
||||
|
||||
if (coreId + 1 > numaNode.cores.size())
|
||||
numaNode.cores.resize(coreId + 1);
|
||||
auto& core = numaNode.cores[coreId];
|
||||
core.procGroup = coreId;
|
||||
core.threadIds.push_back(procId);
|
||||
}
|
||||
}
|
||||
|
||||
if (threadId != uint32_t(-1))
|
||||
out_numThreadsPerProcGroup = 0;
|
||||
for (auto &node : out_nodes)
|
||||
{
|
||||
// Save information.
|
||||
if (out_nodes.size() <= numaId)
|
||||
for (auto &core : node.cores)
|
||||
{
|
||||
out_nodes.resize(numaId + 1);
|
||||
out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup,
|
||||
core.threadIds.size());
|
||||
}
|
||||
auto& numaNode = out_nodes[numaId];
|
||||
numaNode.numaId = numaId;
|
||||
if (numaNode.cores.size() <= coreId)
|
||||
{
|
||||
numaNode.cores.resize(coreId + 1);
|
||||
}
|
||||
auto& core = numaNode.cores[coreId];
|
||||
|
||||
core.procGroup = coreId;
|
||||
core.threadIds.push_back(threadId);
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -316,7 +298,11 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
|
|||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(threadId, &cpuset);
|
||||
|
||||
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
||||
int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
||||
if (err != 0)
|
||||
{
|
||||
fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -1031,7 +1017,16 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
|
|||
}
|
||||
else
|
||||
{
|
||||
pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
|
||||
// numa distribution assumes workers on all nodes
|
||||
bool useNuma = true;
|
||||
if (numCoresPerNode * numHyperThreads == 1)
|
||||
useNuma = false;
|
||||
|
||||
if (useNuma) {
|
||||
pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
|
||||
} else {
|
||||
pPool->numaMask = 0;
|
||||
}
|
||||
|
||||
uint32_t workerId = 0;
|
||||
for (uint32_t n = 0; n < numNodes; ++n)
|
||||
|
@ -1064,7 +1059,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
|
|||
pPool->pThreadData[workerId].workerId = workerId;
|
||||
pPool->pThreadData[workerId].procGroupId = core.procGroup;
|
||||
pPool->pThreadData[workerId].threadId = core.threadIds[t];
|
||||
pPool->pThreadData[workerId].numaId = node.numaId;
|
||||
pPool->pThreadData[workerId].numaId = useNuma ? n : 0;
|
||||
pPool->pThreadData[workerId].coreId = c;
|
||||
pPool->pThreadData[workerId].htId = t;
|
||||
pPool->pThreadData[workerId].pContext = pContext;
|
||||
|
|
Loading…
Reference in New Issue