Allow inlining of all parts of safe_refcount
Differences with this aren't huge but the effort is minimal, in some workloads gain a couple of percent of performance.
This commit is contained in:
parent
4cb50673d7
commit
2085bcce3b
@ -5,8 +5,8 @@
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
|
||||
/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md) */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
@ -29,26 +29,259 @@
|
||||
/*************************************************************************/
|
||||
#include "safe_refcount.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// Atomic functions, these are used for multithread safe reference counters!
|
||||
|
||||
// don't pollute my namespace!
|
||||
#include <windows.h>
|
||||
long atomic_conditional_increment(register long *pw) {
|
||||
#ifdef NO_THREADS
|
||||
|
||||
/* try to increment until it actually works */
|
||||
// taken from boost
|
||||
/* Bogus implementation unaware of multiprocessing */
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {
|
||||
|
||||
if (*pw == 0)
|
||||
return 0;
|
||||
|
||||
(*pw)++;
|
||||
|
||||
return *pw;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {
|
||||
|
||||
(*pw)--;
|
||||
|
||||
return *pw;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {
|
||||
|
||||
(*pw)++;
|
||||
|
||||
return *pw;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {
|
||||
|
||||
(*pw) -= val;
|
||||
|
||||
return *pw;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {
|
||||
|
||||
(*pw) += val;
|
||||
|
||||
return *pw;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {
|
||||
|
||||
if (val > *pw)
|
||||
*pw = val;
|
||||
|
||||
return *pw;
|
||||
}
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
|
||||
/* Implementation for GCC & Clang */
|
||||
|
||||
// GCC guarantees atomic intrinsics for sizes of 1, 2, 4 and 8 bytes.
|
||||
// Clang states it supports GCC atomic builtins.
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {
|
||||
|
||||
while (true) {
|
||||
long tmp = static_cast<long const volatile &>(*pw);
|
||||
T tmp = static_cast<T const volatile &>(*pw);
|
||||
if (tmp == 0)
|
||||
return 0; // if zero, can't add to it anymore
|
||||
if (InterlockedCompareExchange(pw, tmp + 1, tmp) == tmp)
|
||||
if (__sync_val_compare_and_swap(pw, tmp, tmp + 1) == tmp)
|
||||
return tmp + 1;
|
||||
}
|
||||
}
|
||||
|
||||
long atomic_decrement(register long *pw) {
|
||||
return InterlockedDecrement(pw);
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {
|
||||
|
||||
return __sync_sub_and_fetch(pw, 1);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {
|
||||
|
||||
return __sync_add_and_fetch(pw, 1);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {
|
||||
|
||||
return __sync_sub_and_fetch(pw, val);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {
|
||||
|
||||
return __sync_add_and_fetch(pw, val);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {
|
||||
|
||||
while (true) {
|
||||
T tmp = static_cast<T const volatile &>(*pw);
|
||||
if (tmp >= val)
|
||||
return tmp; // already greater, or equal
|
||||
if (__sync_val_compare_and_swap(pw, tmp, val) == tmp)
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
/* Implementation for MSVC-Windows */
|
||||
|
||||
// don't pollute my namespace!
|
||||
#include <windows.h>
|
||||
|
||||
#define ATOMIC_CONDITIONAL_INCREMENT_BODY(m_pw, m_win_type, m_win_cmpxchg, m_cpp_type) \
|
||||
/* try to increment until it actually works */ \
|
||||
/* taken from boost */ \
|
||||
while (true) { \
|
||||
m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw)); \
|
||||
if (tmp == 0) \
|
||||
return 0; /* if zero, can't add to it anymore */ \
|
||||
if (m_win_cmpxchg((m_win_type volatile *)(m_pw), tmp + 1, tmp) == tmp) \
|
||||
return tmp + 1; \
|
||||
}
|
||||
|
||||
#define ATOMIC_EXCHANGE_IF_GREATER_BODY(m_pw, m_val, m_win_type, m_win_cmpxchg, m_cpp_type) \
|
||||
while (true) { \
|
||||
m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw)); \
|
||||
if (tmp >= m_val) \
|
||||
return tmp; /* already greater, or equal */ \
|
||||
if (m_win_cmpxchg((m_win_type volatile *)(m_pw), m_val, tmp) == tmp) \
|
||||
return m_val; \
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint32_t _atomic_conditional_increment_impl(register uint32_t *pw) {
|
||||
|
||||
ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONG, InterlockedCompareExchange, uint32_t)
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint32_t _atomic_decrement_impl(register uint32_t *pw) {
|
||||
|
||||
return InterlockedDecrement((LONG volatile *)pw);
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint32_t _atomic_increment_impl(register uint32_t *pw) {
|
||||
|
||||
return InterlockedIncrement((LONG volatile *)pw);
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint32_t _atomic_sub_impl(register uint32_t *pw, register uint32_t val) {
|
||||
|
||||
return InterlockedExchangeAdd((LONG volatile *)pw, -(int32_t)val) - val;
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint32_t _atomic_add_impl(register uint32_t *pw, register uint32_t val) {
|
||||
|
||||
return InterlockedAdd((LONG volatile *)pw, val);
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint32_t _atomic_exchange_if_greater_impl(register uint32_t *pw, register uint32_t val) {
|
||||
|
||||
ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONG, InterlockedCompareExchange, uint32_t)
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint64_t _atomic_conditional_increment_impl(register uint64_t *pw) {
|
||||
|
||||
ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONGLONG, InterlockedCompareExchange64, uint64_t)
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint64_t _atomic_decrement_impl(register uint64_t *pw) {
|
||||
|
||||
return InterlockedDecrement64((LONGLONG volatile *)pw);
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint64_t _atomic_increment_impl(register uint64_t *pw) {
|
||||
|
||||
return InterlockedIncrement64((LONGLONG volatile *)pw);
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint64_t _atomic_sub_impl(register uint64_t *pw, register uint64_t val) {
|
||||
|
||||
return InterlockedExchangeAdd64((LONGLONG volatile *)pw, -(int64_t)val) - val;
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint64_t _atomic_add_impl(register uint64_t *pw, register uint64_t val) {
|
||||
|
||||
return InterlockedAdd64((LONGLONG volatile *)pw, val);
|
||||
}
|
||||
|
||||
static _ALWAYS_INLINE_ uint64_t _atomic_exchange_if_greater_impl(register uint64_t *pw, register uint64_t val) {
|
||||
|
||||
ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONGLONG, InterlockedCompareExchange64, uint64_t)
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
//no threads supported?
|
||||
#error Must provide atomic functions for this platform or compiler!
|
||||
|
||||
#endif
|
||||
|
||||
// The actual advertised functions; they'll call the right implementation
|
||||
|
||||
uint32_t atomic_conditional_increment(register uint32_t *counter) {
|
||||
return _atomic_conditional_increment_impl(counter);
|
||||
}
|
||||
|
||||
uint32_t atomic_decrement(register uint32_t *pw) {
|
||||
return _atomic_decrement_impl(pw);
|
||||
}
|
||||
|
||||
uint32_t atomic_increment(register uint32_t *pw) {
|
||||
return _atomic_increment_impl(pw);
|
||||
}
|
||||
|
||||
uint32_t atomic_sub(register uint32_t *pw, register uint32_t val) {
|
||||
return _atomic_sub_impl(pw, val);
|
||||
}
|
||||
|
||||
uint32_t atomic_add(register uint32_t *pw, register uint32_t val) {
|
||||
return _atomic_add_impl(pw, val);
|
||||
}
|
||||
|
||||
uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val) {
|
||||
return _atomic_exchange_if_greater_impl(pw, val);
|
||||
}
|
||||
|
||||
uint64_t atomic_conditional_increment(register uint64_t *counter) {
|
||||
return _atomic_conditional_increment_impl(counter);
|
||||
}
|
||||
|
||||
uint64_t atomic_decrement(register uint64_t *pw) {
|
||||
return _atomic_decrement_impl(pw);
|
||||
}
|
||||
|
||||
uint64_t atomic_increment(register uint64_t *pw) {
|
||||
return _atomic_increment_impl(pw);
|
||||
}
|
||||
|
||||
uint64_t atomic_sub(register uint64_t *pw, register uint64_t val) {
|
||||
return _atomic_sub_impl(pw, val);
|
||||
}
|
||||
|
||||
uint64_t atomic_add(register uint64_t *pw, register uint64_t val) {
|
||||
return _atomic_add_impl(pw, val);
|
||||
}
|
||||
|
||||
uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val) {
|
||||
return _atomic_exchange_if_greater_impl(pw, val);
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
|
||||
/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md) */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
@ -34,277 +34,25 @@
|
||||
/* x86/x86_64 GCC */
|
||||
|
||||
#include "platform_config.h"
|
||||
#include "typedefs.h"
|
||||
|
||||
#ifdef NO_THREADS
|
||||
uint32_t atomic_conditional_increment(register uint32_t *counter);
|
||||
uint32_t atomic_decrement(register uint32_t *pw);
|
||||
uint32_t atomic_increment(register uint32_t *pw);
|
||||
uint32_t atomic_sub(register uint32_t *pw, register uint32_t val);
|
||||
uint32_t atomic_add(register uint32_t *pw, register uint32_t val);
|
||||
uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val);
|
||||
|
||||
uint64_t atomic_conditional_increment(register uint64_t *counter);
|
||||
uint64_t atomic_decrement(register uint64_t *pw);
|
||||
uint64_t atomic_increment(register uint64_t *pw);
|
||||
uint64_t atomic_sub(register uint64_t *pw, register uint64_t val);
|
||||
uint64_t atomic_add(register uint64_t *pw, register uint64_t val);
|
||||
uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val);
|
||||
|
||||
struct SafeRefCount {
|
||||
|
||||
int count;
|
||||
|
||||
public:
|
||||
// destroy() is called when weak_count_ drops to zero.
|
||||
|
||||
bool ref() { //true on success
|
||||
|
||||
if (count == 0)
|
||||
return false;
|
||||
count++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int refval() { //true on success
|
||||
|
||||
if (count == 0)
|
||||
return 0;
|
||||
count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
bool unref() { // true if must be disposed of
|
||||
|
||||
if (count > 0)
|
||||
count--;
|
||||
|
||||
return count == 0;
|
||||
}
|
||||
|
||||
long get() const { // nothrow
|
||||
|
||||
return static_cast<int const volatile &>(count);
|
||||
}
|
||||
|
||||
void init(int p_value = 1) {
|
||||
|
||||
count = p_value;
|
||||
};
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
#if defined(PLATFORM_REFCOUNT)
|
||||
|
||||
#include "platform_refcount.h"
|
||||
|
||||
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
|
||||
|
||||
#define REFCOUNT_T volatile int
|
||||
#define REFCOUNT_GET_T int const volatile &
|
||||
|
||||
static inline int atomic_conditional_increment(volatile int *pw) {
|
||||
// int rv = *pw;
|
||||
// if( rv != 0 ) ++*pw;
|
||||
// return rv;
|
||||
|
||||
int rv, tmp;
|
||||
|
||||
__asm__(
|
||||
"movl %0, %%eax\n\t"
|
||||
"0:\n\t"
|
||||
"test %%eax, %%eax\n\t"
|
||||
"je 1f\n\t"
|
||||
"movl %%eax, %2\n\t"
|
||||
"incl %2\n\t"
|
||||
"lock\n\t"
|
||||
"cmpxchgl %2, %0\n\t"
|
||||
"jne 0b\n\t"
|
||||
"1:"
|
||||
: "=m"(*pw), "=&a"(rv), "=&r"(tmp)
|
||||
: // outputs (%0, %1, %2)
|
||||
"m"(*pw)
|
||||
: // input (%3)
|
||||
"cc" // clobbers
|
||||
);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static inline int atomic_decrement(volatile int *pw) {
|
||||
|
||||
// return --(*pw);
|
||||
|
||||
unsigned char rv;
|
||||
|
||||
__asm__(
|
||||
"lock\n\t"
|
||||
"decl %0\n\t"
|
||||
"setne %1"
|
||||
: "=m"(*pw), "=qm"(rv)
|
||||
: "m"(*pw)
|
||||
: "memory");
|
||||
return static_cast<int>(rv);
|
||||
}
|
||||
|
||||
/* PowerPC32/64 GCC */
|
||||
|
||||
#elif (defined(__GNUC__)) && (defined(__powerpc__) || defined(__ppc__))
|
||||
|
||||
#define REFCOUNT_T int
|
||||
#define REFCOUNT_GET_T int const volatile &
|
||||
|
||||
inline int atomic_conditional_increment(int *pw) {
|
||||
// if( *pw != 0 ) ++*pw;
|
||||
// return *pw;
|
||||
|
||||
int rv;
|
||||
|
||||
__asm__(
|
||||
"0:\n\t"
|
||||
"lwarx %1, 0, %2\n\t"
|
||||
"cmpwi %1, 0\n\t"
|
||||
"beq 1f\n\t"
|
||||
"addi %1, %1, 1\n\t"
|
||||
"1:\n\t"
|
||||
"stwcx. %1, 0, %2\n\t"
|
||||
"bne- 0b"
|
||||
:
|
||||
|
||||
"=m"(*pw), "=&b"(rv)
|
||||
: "r"(pw), "m"(*pw)
|
||||
: "cc");
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
inline int atomic_decrement(int *pw) {
|
||||
// return --*pw;
|
||||
|
||||
int rv;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sync\n\t"
|
||||
"0:\n\t"
|
||||
"lwarx %1, 0, %2\n\t"
|
||||
"addi %1, %1, -1\n\t"
|
||||
"stwcx. %1, 0, %2\n\t"
|
||||
"bne- 0b\n\t"
|
||||
"isync"
|
||||
:
|
||||
|
||||
"=m"(*pw), "=&b"(rv)
|
||||
: "r"(pw), "m"(*pw)
|
||||
: "memory", "cc");
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* CW ARM */
|
||||
|
||||
#elif defined(__GNUC__) && (defined(__arm__))
|
||||
|
||||
#define REFCOUNT_T int
|
||||
#define REFCOUNT_GET_T int const volatile &
|
||||
|
||||
inline int atomic_conditional_increment(volatile int *v) {
|
||||
int t;
|
||||
int tmp;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %0, [%2] \n"
|
||||
" cmp %0, #0 \n"
|
||||
" beq 2f \n"
|
||||
" add %0, %0, #1 \n"
|
||||
"2: \n"
|
||||
" strex %1, %0, [%2] \n"
|
||||
" cmp %1, #0 \n"
|
||||
" bne 1b \n"
|
||||
|
||||
: "=&r"(t), "=&r"(tmp)
|
||||
: "r"(v)
|
||||
: "cc", "memory");
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
inline int atomic_decrement(volatile int *v) {
|
||||
int t;
|
||||
int tmp;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %0, [%2] \n"
|
||||
" add %0, %0, #-1 \n"
|
||||
" strex %1, %0, [%2] \n"
|
||||
" cmp %1, #0 \n"
|
||||
" bne 1b \n"
|
||||
|
||||
: "=&r"(t), "=&r"(tmp)
|
||||
: "r"(v)
|
||||
: "cc", "memory");
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/* CW PPC */
|
||||
|
||||
#elif (defined(__MWERKS__)) && defined(__POWERPC__)
|
||||
|
||||
inline long atomic_conditional_increment(register long *pw) {
|
||||
register int a;
|
||||
|
||||
asm {
|
||||
loop:
|
||||
|
||||
lwarx a, 0, pw
|
||||
cmpwi a, 0
|
||||
beq store
|
||||
|
||||
addi a, a, 1
|
||||
|
||||
store:
|
||||
|
||||
stwcx. a, 0, pw
|
||||
bne- loop
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
inline long atomic_decrement(register long *pw) {
|
||||
register int a;
|
||||
|
||||
asm {
|
||||
|
||||
sync
|
||||
|
||||
loop:
|
||||
|
||||
lwarx a, 0, pw
|
||||
addi a, a, -1
|
||||
stwcx. a, 0, pw
|
||||
bne- loop
|
||||
|
||||
isync
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Any Windows (MSVC) */
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
// made functions to not pollute namespace..
|
||||
|
||||
#define REFCOUNT_T long
|
||||
#define REFCOUNT_GET_T long const volatile &
|
||||
|
||||
long atomic_conditional_increment(register long *pw);
|
||||
long atomic_decrement(register long *pw);
|
||||
|
||||
#if 0
|
||||
#elif defined(__GNUC__) && defined(ARMV6_ENABLED)
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#error This platform cannot use safe refcount, compile with NO_THREADS or implement it.
|
||||
|
||||
#endif
|
||||
|
||||
struct SafeRefCount {
|
||||
|
||||
REFCOUNT_T count;
|
||||
uint32_t count;
|
||||
|
||||
public:
|
||||
// destroy() is called when weak_count_ drops to zero.
|
||||
@ -314,7 +62,7 @@ public:
|
||||
return atomic_conditional_increment(&count) != 0;
|
||||
}
|
||||
|
||||
int refval() { //true on success
|
||||
uint32_t refval() { //true on success
|
||||
|
||||
return atomic_conditional_increment(&count);
|
||||
}
|
||||
@ -328,17 +76,15 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
long get() const { // nothrow
|
||||
uint32_t get() const { // nothrow
|
||||
|
||||
return static_cast<REFCOUNT_GET_T>(count);
|
||||
return count;
|
||||
}
|
||||
|
||||
void init(int p_value = 1) {
|
||||
void init(uint32_t p_value = 1) {
|
||||
|
||||
count = p_value;
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
#endif // no thread safe
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user