godot/thirdparty/embree-aarch64/common/sys/barrier.cpp

290 lines
6.2 KiB
C++

// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "barrier.h"
#include "condition.h"
#include "regression.h"
#include "thread.h"
#if defined (__WIN32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
struct BarrierSysImplementation
{
__forceinline BarrierSysImplementation (size_t N)
: i(0), enterCount(0), exitCount(0), barrierSize(0)
{
events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
init(N);
}
__forceinline ~BarrierSysImplementation ()
{
CloseHandle(events[0]);
CloseHandle(events[1]);
}
__forceinline void init(size_t N)
{
barrierSize = N;
enterCount.store(N);
exitCount.store(N);
}
__forceinline void wait()
{
/* every thread entering the barrier decrements this count */
size_t i0 = i;
size_t cnt0 = enterCount--;
/* all threads except the last one are wait in the barrier */
if (cnt0 > 1)
{
if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0)
THROW_RUNTIME_ERROR("WaitForSingleObjects failed");
}
/* the last thread starts all threads waiting at the barrier */
else
{
i = 1-i;
enterCount.store(barrierSize);
if (SetEvent(events[i0]) == 0)
THROW_RUNTIME_ERROR("SetEvent failed");
}
/* every thread leaving the barrier decrements this count */
size_t cnt1 = exitCount--;
/* the last thread that left the barrier resets the event again */
if (cnt1 == 1)
{
exitCount.store(barrierSize);
if (ResetEvent(events[i0]) == 0)
THROW_RUNTIME_ERROR("ResetEvent failed");
}
}
public:
HANDLE events[2];
atomic<size_t> i;
atomic<size_t> enterCount;
atomic<size_t> exitCount;
size_t barrierSize;
};
}
#else
namespace embree
{
struct BarrierSysImplementation
{
__forceinline BarrierSysImplementation (size_t N)
: count(0), barrierSize(0)
{
init(N);
}
__forceinline void init(size_t N)
{
assert(count == 0);
count = 0;
barrierSize = N;
}
__forceinline void wait()
{
mutex.lock();
count++;
if (count == barrierSize) {
count = 0;
cond.notify_all();
mutex.unlock();
return;
}
cond.wait(mutex);
mutex.unlock();
return;
}
public:
MutexSys mutex;
ConditionSys cond;
volatile size_t count;
volatile size_t barrierSize;
};
}
#endif
namespace embree
{
BarrierSys::BarrierSys (size_t N) {
opaque = new BarrierSysImplementation(N);
}
BarrierSys::~BarrierSys () {
delete (BarrierSysImplementation*) opaque;
}
void BarrierSys::init(size_t count) {
((BarrierSysImplementation*) opaque)->init(count);
}
void BarrierSys::wait() {
((BarrierSysImplementation*) opaque)->wait();
}
LinearBarrierActive::LinearBarrierActive (size_t N)
: count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0)
{
if (N == 0) N = getNumberOfLogicalThreads();
init(N);
}
LinearBarrierActive::~LinearBarrierActive()
{
delete[] count0;
delete[] count1;
}
void LinearBarrierActive::init(size_t N)
{
if (threadCount != N) {
threadCount = N;
if (count0) delete[] count0; count0 = new unsigned char[N];
if (count1) delete[] count1; count1 = new unsigned char[N];
}
mode = 0;
flag0 = 0;
flag1 = 0;
for (size_t i=0; i<N; i++) count0[i] = 0;
for (size_t i=0; i<N; i++) count1[i] = 0;
}
void LinearBarrierActive::wait (const size_t threadIndex)
{
if (mode == 0)
{
if (threadIndex == 0)
{
for (size_t i=0; i<threadCount; i++)
count1[i] = 0;
for (size_t i=1; i<threadCount; i++)
{
while (likely(count0[i] == 0))
pause_cpu();
}
mode = 1;
flag1 = 0;
__memory_barrier();
flag0 = 1;
}
else
{
count0[threadIndex] = 1;
{
while (likely(flag0 == 0))
pause_cpu();
}
}
}
else
{
if (threadIndex == 0)
{
for (size_t i=0; i<threadCount; i++)
count0[i] = 0;
for (size_t i=1; i<threadCount; i++)
{
while (likely(count1[i] == 0))
pause_cpu();
}
mode = 0;
flag0 = 0;
__memory_barrier();
flag1 = 1;
}
else
{
count1[threadIndex] = 1;
{
while (likely(flag1 == 0))
pause_cpu();
}
}
}
}
struct barrier_sys_regression_test : public RegressionTest
{
BarrierSys barrier;
std::atomic<size_t> threadID;
std::atomic<size_t> numFailed;
std::vector<size_t> threadResults;
barrier_sys_regression_test()
: RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0)
{
registerRegressionTest(this);
}
static void thread_alloc(barrier_sys_regression_test* This)
{
size_t tid = This->threadID++;
for (size_t j=0; j<1000; j++)
{
This->barrier.wait();
This->threadResults[tid] = tid;
This->barrier.wait();
}
}
bool run ()
{
threadID.store(0);
numFailed.store(0);
size_t numThreads = getNumberOfLogicalThreads();
threadResults.resize(numThreads);
barrier.init(numThreads+1);
/* create threads */
std::vector<thread_t> threads;
for (size_t i=0; i<numThreads; i++)
threads.push_back(createThread((thread_func)thread_alloc,this));
/* run test */
for (size_t i=0; i<1000; i++)
{
for (size_t i=0; i<numThreads; i++) threadResults[i] = 0;
barrier.wait();
barrier.wait();
for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i;
}
/* destroy threads */
for (size_t i=0; i<numThreads; i++)
join(threads[i]);
return numFailed == 0;
}
};
barrier_sys_regression_test barrier_sys_regression_test;
}