430 lines
12 KiB
C++
430 lines
12 KiB
C++
// Copyright 2009-2020 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
#include "thread.h"
|
|
#include "sysinfo.h"
|
|
#include "string.h"
|
|
|
|
#include <iostream>
|
|
#if defined(__ARM_NEON)
|
|
#include "../math/SSE2NEON.h"
|
|
#else
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
#if defined(PTHREADS_WIN32)
|
|
#pragma comment (lib, "pthreadVC.lib")
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// Windows Platform
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(__WIN32__)
|
|
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#include <windows.h>
|
|
|
|
namespace embree
|
|
{
|
|
/*! set the affinity of a given thread */
|
|
void setAffinity(HANDLE thread, ssize_t affinity)
|
|
{
|
|
typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
|
|
typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
|
|
typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
|
|
typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
|
|
HMODULE hlib = LoadLibrary("Kernel32");
|
|
GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
|
|
GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
|
|
SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
|
|
SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
|
|
if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
|
|
{
|
|
int groups = pGetActiveProcessorGroupCount();
|
|
int totalProcessors = 0, group = 0, number = 0;
|
|
for (int i = 0; i<groups; i++) {
|
|
int processors = pGetActiveProcessorCount(i);
|
|
if (totalProcessors + processors > affinity) {
|
|
group = i;
|
|
number = (int)affinity - totalProcessors;
|
|
break;
|
|
}
|
|
totalProcessors += processors;
|
|
}
|
|
|
|
GROUP_AFFINITY groupAffinity;
|
|
groupAffinity.Group = (WORD)group;
|
|
groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
|
|
groupAffinity.Reserved[0] = 0;
|
|
groupAffinity.Reserved[1] = 0;
|
|
groupAffinity.Reserved[2] = 0;
|
|
if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
|
|
WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
|
|
|
|
PROCESSOR_NUMBER processorNumber;
|
|
processorNumber.Group = group;
|
|
processorNumber.Number = number;
|
|
processorNumber.Reserved = 0;
|
|
if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
|
|
WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
|
|
}
|
|
else
|
|
{
|
|
if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
|
|
WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
|
|
if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
|
|
WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
|
|
}
|
|
}
|
|
|
|
/*! set affinity of the calling thread */
|
|
void setAffinity(ssize_t affinity) {
|
|
setAffinity(GetCurrentThread(), affinity);
|
|
}
|
|
|
|
struct ThreadStartupData
|
|
{
|
|
public:
|
|
ThreadStartupData (thread_func f, void* arg)
|
|
: f(f), arg(arg) {}
|
|
public:
|
|
thread_func f;
|
|
void* arg;
|
|
};
|
|
|
|
DWORD WINAPI threadStartup(LPVOID ptr)
|
|
{
|
|
ThreadStartupData* parg = (ThreadStartupData*) ptr;
|
|
_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
|
|
parg->f(parg->arg);
|
|
delete parg;
|
|
parg = nullptr;
|
|
return 0;
|
|
}
|
|
|
|
#if !defined(PTHREADS_WIN32)
|
|
|
|
/*! creates a hardware thread running on specific core */
|
|
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
|
|
{
|
|
HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
|
|
if (thread == nullptr) FATAL("CreateThread failed");
|
|
if (threadID >= 0) setAffinity(thread, threadID);
|
|
return thread_t(thread);
|
|
}
|
|
|
|
/*! the thread calling this function gets yielded */
|
|
void yield() {
|
|
SwitchToThread();
|
|
}
|
|
|
|
/*! waits until the given thread has terminated */
|
|
void join(thread_t tid) {
|
|
WaitForSingleObject(HANDLE(tid), INFINITE);
|
|
CloseHandle(HANDLE(tid));
|
|
}
|
|
|
|
/*! creates thread local storage */
|
|
tls_t createTls() {
|
|
return tls_t(size_t(TlsAlloc()));
|
|
}
|
|
|
|
/*! set the thread local storage pointer */
|
|
void setTls(tls_t tls, void* const ptr) {
|
|
TlsSetValue(DWORD(size_t(tls)), ptr);
|
|
}
|
|
|
|
/*! return the thread local storage pointer */
|
|
void* getTls(tls_t tls) {
|
|
return TlsGetValue(DWORD(size_t(tls)));
|
|
}
|
|
|
|
/*! destroys thread local storage identifier */
|
|
void destroyTls(tls_t tls) {
|
|
TlsFree(DWORD(size_t(tls)));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// Linux Platform
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(__LINUX__)
|
|
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <algorithm>
|
|
|
|
#if defined(__ANDROID__)
|
|
#include <pthread.h>
|
|
#endif
|
|
|
|
namespace embree
|
|
{
|
|
static MutexSys mutex;
|
|
static std::vector<size_t> threadIDs;
|
|
|
|
#if !defined(__ANDROID__) // TODO(LTE): Implement for Android target
|
|
/* changes thread ID mapping such that we first fill up all thread on one core */
|
|
size_t mapThreadID(size_t threadID)
|
|
{
|
|
Lock<MutexSys> lock(mutex);
|
|
|
|
if (threadIDs.size() == 0)
|
|
{
|
|
/* parse thread/CPU topology */
|
|
for (size_t cpuID=0;;cpuID++)
|
|
{
|
|
std::fstream fs;
|
|
std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
|
|
fs.open (cpu.c_str(), std::fstream::in);
|
|
if (fs.fail()) break;
|
|
|
|
int i;
|
|
while (fs >> i)
|
|
{
|
|
if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
|
|
threadIDs.push_back(i);
|
|
if (fs.peek() == ',')
|
|
fs.ignore();
|
|
}
|
|
fs.close();
|
|
}
|
|
|
|
#if 0
|
|
for (size_t i=0;i<threadIDs.size();i++)
|
|
std::cout << i << " -> " << threadIDs[i] << std::endl;
|
|
#endif
|
|
|
|
/* verify the mapping and do not use it if the mapping has errors */
|
|
for (size_t i=0;i<threadIDs.size();i++) {
|
|
for (size_t j=0;j<threadIDs.size();j++) {
|
|
if (i != j && threadIDs[i] == threadIDs[j]) {
|
|
threadIDs.clear();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* re-map threadIDs if mapping is available */
|
|
size_t ID = threadID;
|
|
if (threadID < threadIDs.size())
|
|
ID = threadIDs[threadID];
|
|
|
|
/* find correct thread to affinitize to */
|
|
cpu_set_t set;
|
|
if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
|
|
{
|
|
for (int i=0, j=0; i<CPU_SETSIZE; i++)
|
|
{
|
|
if (!CPU_ISSET(i,&set)) continue;
|
|
|
|
if (j == ID) {
|
|
ID = i;
|
|
break;
|
|
}
|
|
j++;
|
|
}
|
|
}
|
|
|
|
return ID;
|
|
}
|
|
#endif
|
|
|
|
/*! set affinity of the calling thread */
|
|
void setAffinity(ssize_t affinity)
|
|
{
|
|
#if defined(__ANDROID__)
|
|
// TODO(LTE): Implement
|
|
#else
|
|
cpu_set_t cset;
|
|
CPU_ZERO(&cset);
|
|
size_t threadID = mapThreadID(affinity);
|
|
CPU_SET(threadID, &cset);
|
|
|
|
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
|
|
#endif
|
|
}
|
|
}
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// FreeBSD Platform
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(__FreeBSD__)
|
|
|
|
#include <pthread_np.h>
|
|
|
|
namespace embree
|
|
{
|
|
/*! set affinity of the calling thread */
|
|
void setAffinity(ssize_t affinity)
|
|
{
|
|
cpuset_t cset;
|
|
CPU_ZERO(&cset);
|
|
CPU_SET(affinity, &cset);
|
|
|
|
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// MacOSX Platform
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(__MACOSX__)
|
|
|
|
#include <mach/thread_act.h>
|
|
#include <mach/thread_policy.h>
|
|
#include <mach/mach_init.h>
|
|
|
|
namespace embree
|
|
{
|
|
/*! set affinity of the calling thread */
|
|
void setAffinity(ssize_t affinity)
|
|
{
|
|
thread_affinity_policy ap;
|
|
ap.affinity_tag = affinity;
|
|
if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
|
|
WARNING("setting thread affinity failed"); // on purpose only a warning
|
|
}
|
|
}
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// Unix Platform
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
|
|
|
|
#include <pthread.h>
|
|
#include <sched.h>
|
|
|
|
#if defined(__USE_NUMA__)
|
|
#include <numa.h>
|
|
#endif
|
|
|
|
namespace embree
|
|
{
|
|
struct ThreadStartupData
|
|
{
|
|
public:
|
|
ThreadStartupData (thread_func f, void* arg, int affinity)
|
|
: f(f), arg(arg), affinity(affinity) {}
|
|
public:
|
|
thread_func f;
|
|
void* arg;
|
|
ssize_t affinity;
|
|
};
|
|
|
|
static void* threadStartup(ThreadStartupData* parg)
|
|
{
|
|
_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
|
|
|
|
/*! Mac OS X does not support setting affinity at thread creation time */
|
|
#if defined(__MACOSX__)
|
|
if (parg->affinity >= 0)
|
|
setAffinity(parg->affinity);
|
|
#endif
|
|
|
|
parg->f(parg->arg);
|
|
delete parg;
|
|
parg = nullptr;
|
|
return nullptr;
|
|
}
|
|
|
|
/*! creates a hardware thread running on specific core */
|
|
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
|
|
{
|
|
/* set stack size */
|
|
pthread_attr_t attr;
|
|
pthread_attr_init(&attr);
|
|
if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
|
|
|
|
/* create thread */
|
|
pthread_t* tid = new pthread_t;
|
|
if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
|
|
pthread_attr_destroy(&attr);
|
|
delete tid;
|
|
FATAL("pthread_create failed");
|
|
}
|
|
pthread_attr_destroy(&attr);
|
|
|
|
/* set affinity */
|
|
#if defined(__LINUX__) && !defined(__ANDROID__)
|
|
if (threadID >= 0) {
|
|
cpu_set_t cset;
|
|
CPU_ZERO(&cset);
|
|
threadID = mapThreadID(threadID);
|
|
CPU_SET(threadID, &cset);
|
|
pthread_setaffinity_np(*tid, sizeof(cset), &cset);
|
|
}
|
|
#elif defined(__FreeBSD__)
|
|
if (threadID >= 0) {
|
|
cpuset_t cset;
|
|
CPU_ZERO(&cset);
|
|
CPU_SET(threadID, &cset);
|
|
pthread_setaffinity_np(*tid, sizeof(cset), &cset);
|
|
}
|
|
#endif
|
|
|
|
return thread_t(tid);
|
|
}
|
|
|
|
/*! the thread calling this function gets yielded */
|
|
void yield() {
|
|
sched_yield();
|
|
}
|
|
|
|
/*! waits until the given thread has terminated */
|
|
void join(thread_t tid) {
|
|
if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
|
|
FATAL("pthread_join failed");
|
|
delete (pthread_t*)tid;
|
|
}
|
|
|
|
/*! creates thread local storage */
|
|
tls_t createTls()
|
|
{
|
|
pthread_key_t* key = new pthread_key_t;
|
|
if (pthread_key_create(key,nullptr) != 0) {
|
|
delete key;
|
|
FATAL("pthread_key_create failed");
|
|
}
|
|
|
|
return tls_t(key);
|
|
}
|
|
|
|
/*! return the thread local storage pointer */
|
|
void* getTls(tls_t tls)
|
|
{
|
|
assert(tls);
|
|
return pthread_getspecific(*(pthread_key_t*)tls);
|
|
}
|
|
|
|
/*! set the thread local storage pointer */
|
|
void setTls(tls_t tls, void* const ptr)
|
|
{
|
|
assert(tls);
|
|
if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
|
|
FATAL("pthread_setspecific failed");
|
|
}
|
|
|
|
/*! destroys thread local storage identifier */
|
|
void destroyTls(tls_t tls)
|
|
{
|
|
assert(tls);
|
|
if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
|
|
FATAL("pthread_key_delete failed");
|
|
delete (pthread_key_t*)tls;
|
|
}
|
|
}
|
|
|
|
#endif
|