swr/rast: adjust linux cpu topology identification code

Make more robust to handle strange strange configurations like a vmware
exported 4-way numa X 1-core configuration.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-09-06 14:59:33 -05:00
parent 1ccf9ad280
commit ead0dfe31e
1 changed files with 38 additions and 43 deletions

View File

@ -169,37 +169,16 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
std::ifstream input("/proc/cpuinfo");
std::string line;
char* c;
uint32_t threadId = uint32_t(-1);
uint32_t procId = uint32_t(-1);
uint32_t coreId = uint32_t(-1);
uint32_t numaId = uint32_t(-1);
uint32_t physId = uint32_t(-1);
while (std::getline(input, line))
{
if (line.find("processor") != std::string::npos)
{
if (threadId != uint32_t(-1))
{
// Save information.
if (out_nodes.size() <= numaId)
{
out_nodes.resize(numaId + 1);
}
auto& numaNode = out_nodes[numaId];
if (numaNode.cores.size() <= coreId)
{
numaNode.cores.resize(coreId + 1);
}
auto& core = numaNode.cores[coreId];
core.procGroup = coreId;
core.threadIds.push_back(threadId);
out_numThreadsPerProcGroup++;
}
auto data_start = line.find(": ") + 2;
threadId = std::strtoul(&line.c_str()[data_start], &c, 10);
procId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.find("core id") != std::string::npos)
@ -211,29 +190,32 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
if (line.find("physical id") != std::string::npos)
{
auto data_start = line.find(": ") + 2;
numaId = std::strtoul(&line.c_str()[data_start], &c, 10);
physId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.length() == 0)
{
if (physId + 1 > out_nodes.size())
out_nodes.resize(physId + 1);
auto& numaNode = out_nodes[physId];
numaNode.numaId = physId;
if (coreId + 1 > numaNode.cores.size())
numaNode.cores.resize(coreId + 1);
auto& core = numaNode.cores[coreId];
core.procGroup = coreId;
core.threadIds.push_back(procId);
}
}
if (threadId != uint32_t(-1))
out_numThreadsPerProcGroup = 0;
for (auto &node : out_nodes)
{
// Save information.
if (out_nodes.size() <= numaId)
for (auto &core : node.cores)
{
out_nodes.resize(numaId + 1);
out_numThreadsPerProcGroup = std::max((size_t)out_numThreadsPerProcGroup,
core.threadIds.size());
}
auto& numaNode = out_nodes[numaId];
numaNode.numaId = numaId;
if (numaNode.cores.size() <= coreId)
{
numaNode.cores.resize(coreId + 1);
}
auto& core = numaNode.cores[coreId];
core.procGroup = coreId;
core.threadIds.push_back(threadId);
out_numThreadsPerProcGroup++;
}
#else
@ -316,7 +298,11 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
CPU_ZERO(&cpuset);
CPU_SET(threadId, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
if (err != 0)
{
fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
}
#endif
}
@ -1031,7 +1017,16 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
}
else
{
pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
// numa distribution assumes workers on all nodes
bool useNuma = true;
if (numCoresPerNode * numHyperThreads == 1)
useNuma = false;
if (useNuma) {
pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
} else {
pPool->numaMask = 0;
}
uint32_t workerId = 0;
for (uint32_t n = 0; n < numNodes; ++n)
@ -1064,7 +1059,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
pPool->pThreadData[workerId].workerId = workerId;
pPool->pThreadData[workerId].procGroupId = core.procGroup;
pPool->pThreadData[workerId].threadId = core.threadIds[t];
pPool->pThreadData[workerId].numaId = node.numaId;
pPool->pThreadData[workerId].numaId = useNuma ? n : 0;
pPool->pThreadData[workerId].coreId = c;
pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;