1bea8e1eac
-Added LocalVector (needed it) -Added stb_rect_pack (It's pretty cool, we could probably use it for other stuff too) -Fixes and changes all around the place -Added library for 128 bits fixed point (required for Delaunay3D)
298 lines
10 KiB
C++
298 lines
10 KiB
C++
// ======================================================================== //
|
|
// Copyright 2009-2019 Intel Corporation //
|
|
// //
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); //
|
|
// you may not use this file except in compliance with the License. //
|
|
// You may obtain a copy of the License at //
|
|
// //
|
|
// http://www.apache.org/licenses/LICENSE-2.0 //
|
|
// //
|
|
// Unless required by applicable law or agreed to in writing, software //
|
|
// distributed under the License is distributed on an "AS IS" BASIS, //
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
|
|
// See the License for the specific language governing permissions and //
|
|
// limitations under the License. //
|
|
// ======================================================================== //
|
|
|
|
#if defined(_MSC_VER)
|
|
#pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
|
|
#endif
|
|
|
|
#if defined(__APPLE__)
|
|
#include <mach/thread_act.h>
|
|
#include <mach/mach_init.h>
|
|
#endif
|
|
|
|
#include "thread.h"
|
|
#include <fstream>
|
|
|
|
namespace oidn {
|
|
|
|
#if defined(_WIN32)
|
|
|
|
// --------------------------------------------------------------------------
|
|
// ThreadAffinity - Windows
|
|
// --------------------------------------------------------------------------
|
|
|
|
ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
|
|
: Verbose(verbose)
|
|
{
|
|
HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
|
|
pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
|
|
pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
|
|
|
|
if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
|
|
{
|
|
// Get logical processor information
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
|
|
DWORD bufferSize = 0;
|
|
|
|
// First call the function with an empty buffer to get the required buffer size
|
|
BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
|
|
if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
|
{
|
|
OIDN_WARNING("GetLogicalProcessorInformationEx failed");
|
|
return;
|
|
}
|
|
|
|
// Allocate the buffer
|
|
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
|
|
if (!buffer)
|
|
{
|
|
OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
|
|
return;
|
|
}
|
|
|
|
// Call again the function but now with the properly sized buffer
|
|
result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
|
|
if (!result)
|
|
{
|
|
OIDN_WARNING("GetLogicalProcessorInformationEx failed");
|
|
free(buffer);
|
|
return;
|
|
}
|
|
|
|
// Iterate over the logical processor information structures
|
|
// There should be one structure for each physical core
|
|
char* ptr = (char*)buffer;
|
|
while (ptr < (char*)buffer + bufferSize)
|
|
{
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
|
|
if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
|
|
{
|
|
// Iterate over the groups
|
|
int numThreads = 0;
|
|
for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
|
|
{
|
|
GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
|
|
while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
|
|
{
|
|
// Extract the next set bit/thread from the mask
|
|
GROUP_AFFINITY threadAffinity = coreAffinity;
|
|
threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
|
|
|
|
// Push the affinity for this thread
|
|
affinities.push_back(threadAffinity);
|
|
oldAffinities.push_back(threadAffinity);
|
|
numThreads++;
|
|
|
|
// Remove this bit/thread from the mask
|
|
coreAffinity.Mask ^= threadAffinity.Mask;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Next structure
|
|
ptr += item->Size;
|
|
}
|
|
|
|
// Free the buffer
|
|
free(buffer);
|
|
}
|
|
}
|
|
|
|
void ThreadAffinity::set(int threadIndex)
|
|
{
|
|
if (threadIndex >= (int)affinities.size())
|
|
return;
|
|
|
|
// Save the current affinity and set the new one
|
|
const HANDLE thread = GetCurrentThread();
|
|
if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
|
|
OIDN_WARNING("SetThreadGroupAffinity failed");
|
|
}
|
|
|
|
void ThreadAffinity::restore(int threadIndex)
|
|
{
|
|
if (threadIndex >= (int)affinities.size())
|
|
return;
|
|
|
|
// Restore the original affinity
|
|
const HANDLE thread = GetCurrentThread();
|
|
if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
|
|
OIDN_WARNING("SetThreadGroupAffinity failed");
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
|
|
// --------------------------------------------------------------------------
|
|
// ThreadAffinity - Linux
|
|
// --------------------------------------------------------------------------
|
|
|
|
ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
|
|
: Verbose(verbose)
|
|
{
|
|
std::vector<int> threadIds;
|
|
|
|
// Parse the thread/CPU topology
|
|
for (int cpuId = 0; ; cpuId++)
|
|
{
|
|
std::fstream fs;
|
|
std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
|
|
fs.open(cpu.c_str(), std::fstream::in);
|
|
if (fs.fail()) break;
|
|
|
|
int i;
|
|
int j = 0;
|
|
while ((j < numThreadsPerCore) && (fs >> i))
|
|
{
|
|
if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
|
|
threadIds.push_back(i);
|
|
|
|
if (fs.peek() == ',')
|
|
fs.ignore();
|
|
j++;
|
|
}
|
|
|
|
fs.close();
|
|
}
|
|
|
|
#if 0
|
|
for (size_t i = 0; i < thread_ids.size(); ++i)
|
|
std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
|
|
#endif
|
|
|
|
// Create the affinity structures
|
|
affinities.resize(threadIds.size());
|
|
oldAffinities.resize(threadIds.size());
|
|
|
|
for (size_t i = 0; i < threadIds.size(); ++i)
|
|
{
|
|
cpu_set_t affinity;
|
|
CPU_ZERO(&affinity);
|
|
CPU_SET(threadIds[i], &affinity);
|
|
|
|
affinities[i] = affinity;
|
|
oldAffinities[i] = affinity;
|
|
}
|
|
}
|
|
|
|
void ThreadAffinity::set(int threadIndex)
|
|
{
|
|
if (threadIndex >= (int)affinities.size())
|
|
return;
|
|
|
|
const pthread_t thread = pthread_self();
|
|
|
|
// Save the current affinity
|
|
if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
|
|
{
|
|
OIDN_WARNING("pthread_getaffinity_np failed");
|
|
oldAffinities[threadIndex] = affinities[threadIndex];
|
|
return;
|
|
}
|
|
|
|
// Set the new affinity
|
|
if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
|
|
OIDN_WARNING("pthread_setaffinity_np failed");
|
|
}
|
|
|
|
void ThreadAffinity::restore(int threadIndex)
|
|
{
|
|
if (threadIndex >= (int)affinities.size())
|
|
return;
|
|
|
|
const pthread_t thread = pthread_self();
|
|
|
|
// Restore the original affinity
|
|
if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
|
|
OIDN_WARNING("pthread_setaffinity_np failed");
|
|
}
|
|
|
|
#elif defined(__APPLE__)
|
|
|
|
// --------------------------------------------------------------------------
|
|
// ThreadAffinity - macOS
|
|
// --------------------------------------------------------------------------
|
|
|
|
ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
|
|
: Verbose(verbose)
|
|
{
|
|
// Query the thread/CPU topology
|
|
int numPhysicalCpus;
|
|
int numLogicalCpus;
|
|
|
|
if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
|
|
{
|
|
OIDN_WARNING("sysctlbyname failed");
|
|
return;
|
|
}
|
|
|
|
if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
|
|
return; // this shouldn't happen
|
|
const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
|
|
|
|
// Create the affinity structures
|
|
// macOS doesn't support binding a thread to a specific core, but we can at least group threads which
|
|
// should be on the same core together
|
|
for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
|
|
{
|
|
thread_affinity_policy affinity;
|
|
affinity.affinity_tag = core;
|
|
|
|
for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
|
|
{
|
|
affinities.push_back(affinity);
|
|
oldAffinities.push_back(affinity);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ThreadAffinity::set(int threadIndex)
|
|
{
|
|
if (threadIndex >= (int)affinities.size())
|
|
return;
|
|
|
|
const auto thread = mach_thread_self();
|
|
|
|
// Save the current affinity
|
|
mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
|
|
boolean_t getDefault = FALSE;
|
|
if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
|
|
{
|
|
OIDN_WARNING("thread_policy_get failed");
|
|
oldAffinities[threadIndex] = affinities[threadIndex];
|
|
return;
|
|
}
|
|
|
|
// Set the new affinity
|
|
if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
|
|
OIDN_WARNING("thread_policy_set failed");
|
|
}
|
|
|
|
void ThreadAffinity::restore(int threadIndex)
|
|
{
|
|
if (threadIndex >= (int)affinities.size())
|
|
return;
|
|
|
|
const auto thread = mach_thread_self();
|
|
|
|
// Restore the original affinity
|
|
if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
|
|
OIDN_WARNING("thread_policy_set failed");
|
|
}
|
|
|
|
#endif
|
|
|
|
} // namespace oidn
|