Allow inlining of all parts of safe_refcount

Differences with this aren't huge but the effort is minimal, in some
workloads gain a couple of percent of performance.
This commit is contained in:
Hein-Pieter van Braam 2017-09-25 00:01:55 +02:00 committed by Xavier Sellier
parent 4cb50673d7
commit 2085bcce3b
2 changed files with 267 additions and 288 deletions

View File

@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md) */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@ -29,26 +29,259 @@
/*************************************************************************/
#include "safe_refcount.h"
#ifdef _MSC_VER
// Atomic functions, these are used for multithread safe reference counters!
// don't pollute my namespace!
#include <windows.h>
long atomic_conditional_increment(register long *pw) {
#ifdef NO_THREADS
/* try to increment until it actually works */
// taken from boost
/* Bogus implementation unaware of multiprocessing */
template <class T>
static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {
if (*pw == 0)
return 0;
(*pw)++;
return *pw;
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {
(*pw)--;
return *pw;
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {
(*pw)++;
return *pw;
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {
(*pw) -= val;
return *pw;
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {
(*pw) += val;
return *pw;
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {
if (val > *pw)
*pw = val;
return *pw;
}
#elif defined(__GNUC__)
/* Implementation for GCC & Clang */
// GCC guarantees atomic intrinsics for sizes of 1, 2, 4 and 8 bytes.
// Clang states it supports GCC atomic builtins.
template <class T>
static _ALWAYS_INLINE_ T _atomic_conditional_increment_impl(register T *pw) {
while (true) {
long tmp = static_cast<long const volatile &>(*pw);
T tmp = static_cast<T const volatile &>(*pw);
if (tmp == 0)
return 0; // if zero, can't add to it anymore
if (InterlockedCompareExchange(pw, tmp + 1, tmp) == tmp)
if (__sync_val_compare_and_swap(pw, tmp, tmp + 1) == tmp)
return tmp + 1;
}
}
long atomic_decrement(register long *pw) {
return InterlockedDecrement(pw);
template <class T>
static _ALWAYS_INLINE_ T _atomic_decrement_impl(register T *pw) {
return __sync_sub_and_fetch(pw, 1);
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_increment_impl(register T *pw) {
return __sync_add_and_fetch(pw, 1);
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_sub_impl(register T *pw, register T val) {
return __sync_sub_and_fetch(pw, val);
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_add_impl(register T *pw, register T val) {
return __sync_add_and_fetch(pw, val);
}
template <class T>
static _ALWAYS_INLINE_ T _atomic_exchange_if_greater_impl(register T *pw, register T val) {
while (true) {
T tmp = static_cast<T const volatile &>(*pw);
if (tmp >= val)
return tmp; // already greater, or equal
if (__sync_val_compare_and_swap(pw, tmp, val) == tmp)
return val;
}
}
#elif defined(_MSC_VER)
/* Implementation for MSVC-Windows */
// don't pollute my namespace!
#include <windows.h>
#define ATOMIC_CONDITIONAL_INCREMENT_BODY(m_pw, m_win_type, m_win_cmpxchg, m_cpp_type) \
/* try to increment until it actually works */ \
/* taken from boost */ \
while (true) { \
m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw)); \
if (tmp == 0) \
return 0; /* if zero, can't add to it anymore */ \
if (m_win_cmpxchg((m_win_type volatile *)(m_pw), tmp + 1, tmp) == tmp) \
return tmp + 1; \
}
#define ATOMIC_EXCHANGE_IF_GREATER_BODY(m_pw, m_val, m_win_type, m_win_cmpxchg, m_cpp_type) \
while (true) { \
m_cpp_type tmp = static_cast<m_cpp_type const volatile &>(*(m_pw)); \
if (tmp >= m_val) \
return tmp; /* already greater, or equal */ \
if (m_win_cmpxchg((m_win_type volatile *)(m_pw), m_val, tmp) == tmp) \
return m_val; \
}
static _ALWAYS_INLINE_ uint32_t _atomic_conditional_increment_impl(register uint32_t *pw) {
ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONG, InterlockedCompareExchange, uint32_t)
}
static _ALWAYS_INLINE_ uint32_t _atomic_decrement_impl(register uint32_t *pw) {
return InterlockedDecrement((LONG volatile *)pw);
}
static _ALWAYS_INLINE_ uint32_t _atomic_increment_impl(register uint32_t *pw) {
return InterlockedIncrement((LONG volatile *)pw);
}
static _ALWAYS_INLINE_ uint32_t _atomic_sub_impl(register uint32_t *pw, register uint32_t val) {
return InterlockedExchangeAdd((LONG volatile *)pw, -(int32_t)val) - val;
}
static _ALWAYS_INLINE_ uint32_t _atomic_add_impl(register uint32_t *pw, register uint32_t val) {
return InterlockedAdd((LONG volatile *)pw, val);
}
static _ALWAYS_INLINE_ uint32_t _atomic_exchange_if_greater_impl(register uint32_t *pw, register uint32_t val) {
ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONG, InterlockedCompareExchange, uint32_t)
}
static _ALWAYS_INLINE_ uint64_t _atomic_conditional_increment_impl(register uint64_t *pw) {
ATOMIC_CONDITIONAL_INCREMENT_BODY(pw, LONGLONG, InterlockedCompareExchange64, uint64_t)
}
static _ALWAYS_INLINE_ uint64_t _atomic_decrement_impl(register uint64_t *pw) {
return InterlockedDecrement64((LONGLONG volatile *)pw);
}
static _ALWAYS_INLINE_ uint64_t _atomic_increment_impl(register uint64_t *pw) {
return InterlockedIncrement64((LONGLONG volatile *)pw);
}
static _ALWAYS_INLINE_ uint64_t _atomic_sub_impl(register uint64_t *pw, register uint64_t val) {
return InterlockedExchangeAdd64((LONGLONG volatile *)pw, -(int64_t)val) - val;
}
static _ALWAYS_INLINE_ uint64_t _atomic_add_impl(register uint64_t *pw, register uint64_t val) {
return InterlockedAdd64((LONGLONG volatile *)pw, val);
}
static _ALWAYS_INLINE_ uint64_t _atomic_exchange_if_greater_impl(register uint64_t *pw, register uint64_t val) {
ATOMIC_EXCHANGE_IF_GREATER_BODY(pw, val, LONGLONG, InterlockedCompareExchange64, uint64_t)
}
#else
//no threads supported?
#error Must provide atomic functions for this platform or compiler!
#endif
// The actual advertised functions; they'll call the right implementation
uint32_t atomic_conditional_increment(register uint32_t *counter) {
return _atomic_conditional_increment_impl(counter);
}
uint32_t atomic_decrement(register uint32_t *pw) {
return _atomic_decrement_impl(pw);
}
uint32_t atomic_increment(register uint32_t *pw) {
return _atomic_increment_impl(pw);
}
uint32_t atomic_sub(register uint32_t *pw, register uint32_t val) {
return _atomic_sub_impl(pw, val);
}
uint32_t atomic_add(register uint32_t *pw, register uint32_t val) {
return _atomic_add_impl(pw, val);
}
uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val) {
return _atomic_exchange_if_greater_impl(pw, val);
}
uint64_t atomic_conditional_increment(register uint64_t *counter) {
return _atomic_conditional_increment_impl(counter);
}
uint64_t atomic_decrement(register uint64_t *pw) {
return _atomic_decrement_impl(pw);
}
uint64_t atomic_increment(register uint64_t *pw) {
return _atomic_increment_impl(pw);
}
uint64_t atomic_sub(register uint64_t *pw, register uint64_t val) {
return _atomic_sub_impl(pw, val);
}
uint64_t atomic_add(register uint64_t *pw, register uint64_t val) {
return _atomic_add_impl(pw, val);
}
uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val) {
return _atomic_exchange_if_greater_impl(pw, val);
}

View File

@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
/* Copyright (c) 2007-2017 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2017 Godot Engine contributors (cf. AUTHORS.md) */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@ -34,277 +34,25 @@
/* x86/x86_64 GCC */
#include "platform_config.h"
#include "typedefs.h"
#ifdef NO_THREADS
uint32_t atomic_conditional_increment(register uint32_t *counter);
uint32_t atomic_decrement(register uint32_t *pw);
uint32_t atomic_increment(register uint32_t *pw);
uint32_t atomic_sub(register uint32_t *pw, register uint32_t val);
uint32_t atomic_add(register uint32_t *pw, register uint32_t val);
uint32_t atomic_exchange_if_greater(register uint32_t *pw, register uint32_t val);
uint64_t atomic_conditional_increment(register uint64_t *counter);
uint64_t atomic_decrement(register uint64_t *pw);
uint64_t atomic_increment(register uint64_t *pw);
uint64_t atomic_sub(register uint64_t *pw, register uint64_t val);
uint64_t atomic_add(register uint64_t *pw, register uint64_t val);
uint64_t atomic_exchange_if_greater(register uint64_t *pw, register uint64_t val);
struct SafeRefCount {
int count;
public:
// destroy() is called when weak_count_ drops to zero.
bool ref() { //true on success
if (count == 0)
return false;
count++;
return true;
}
int refval() { //true on success
if (count == 0)
return 0;
count++;
return count;
}
bool unref() { // true if must be disposed of
if (count > 0)
count--;
return count == 0;
}
long get() const { // nothrow
return static_cast<int const volatile &>(count);
}
void init(int p_value = 1) {
count = p_value;
};
};
#else
#if defined(PLATFORM_REFCOUNT)
#include "platform_refcount.h"
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
#define REFCOUNT_T volatile int
#define REFCOUNT_GET_T int const volatile &
static inline int atomic_conditional_increment(volatile int *pw) {
// int rv = *pw;
// if( rv != 0 ) ++*pw;
// return rv;
int rv, tmp;
__asm__(
"movl %0, %%eax\n\t"
"0:\n\t"
"test %%eax, %%eax\n\t"
"je 1f\n\t"
"movl %%eax, %2\n\t"
"incl %2\n\t"
"lock\n\t"
"cmpxchgl %2, %0\n\t"
"jne 0b\n\t"
"1:"
: "=m"(*pw), "=&a"(rv), "=&r"(tmp)
: // outputs (%0, %1, %2)
"m"(*pw)
: // input (%3)
"cc" // clobbers
);
return rv;
}
static inline int atomic_decrement(volatile int *pw) {
// return --(*pw);
unsigned char rv;
__asm__(
"lock\n\t"
"decl %0\n\t"
"setne %1"
: "=m"(*pw), "=qm"(rv)
: "m"(*pw)
: "memory");
return static_cast<int>(rv);
}
/* PowerPC32/64 GCC */
#elif (defined(__GNUC__)) && (defined(__powerpc__) || defined(__ppc__))
#define REFCOUNT_T int
#define REFCOUNT_GET_T int const volatile &
inline int atomic_conditional_increment(int *pw) {
// if( *pw != 0 ) ++*pw;
// return *pw;
int rv;
__asm__(
"0:\n\t"
"lwarx %1, 0, %2\n\t"
"cmpwi %1, 0\n\t"
"beq 1f\n\t"
"addi %1, %1, 1\n\t"
"1:\n\t"
"stwcx. %1, 0, %2\n\t"
"bne- 0b"
:
"=m"(*pw), "=&b"(rv)
: "r"(pw), "m"(*pw)
: "cc");
return rv;
}
inline int atomic_decrement(int *pw) {
// return --*pw;
int rv;
__asm__ __volatile__(
"sync\n\t"
"0:\n\t"
"lwarx %1, 0, %2\n\t"
"addi %1, %1, -1\n\t"
"stwcx. %1, 0, %2\n\t"
"bne- 0b\n\t"
"isync"
:
"=m"(*pw), "=&b"(rv)
: "r"(pw), "m"(*pw)
: "memory", "cc");
return rv;
}
/* CW ARM */
#elif defined(__GNUC__) && (defined(__arm__))
#define REFCOUNT_T int
#define REFCOUNT_GET_T int const volatile &
inline int atomic_conditional_increment(volatile int *v) {
int t;
int tmp;
__asm__ __volatile__(
"1: ldrex %0, [%2] \n"
" cmp %0, #0 \n"
" beq 2f \n"
" add %0, %0, #1 \n"
"2: \n"
" strex %1, %0, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
: "=&r"(t), "=&r"(tmp)
: "r"(v)
: "cc", "memory");
return t;
}
inline int atomic_decrement(volatile int *v) {
int t;
int tmp;
__asm__ __volatile__(
"1: ldrex %0, [%2] \n"
" add %0, %0, #-1 \n"
" strex %1, %0, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
: "=&r"(t), "=&r"(tmp)
: "r"(v)
: "cc", "memory");
return t;
}
/* CW PPC */
#elif (defined(__MWERKS__)) && defined(__POWERPC__)
inline long atomic_conditional_increment(register long *pw) {
register int a;
asm {
loop:
lwarx a, 0, pw
cmpwi a, 0
beq store
addi a, a, 1
store:
stwcx. a, 0, pw
bne- loop
}
return a;
}
inline long atomic_decrement(register long *pw) {
register int a;
asm {
sync
loop:
lwarx a, 0, pw
addi a, a, -1
stwcx. a, 0, pw
bne- loop
isync
}
return a;
}
/* Any Windows (MSVC) */
#elif defined(_MSC_VER)
// made functions to not pollute namespace..
#define REFCOUNT_T long
#define REFCOUNT_GET_T long const volatile &
long atomic_conditional_increment(register long *pw);
long atomic_decrement(register long *pw);
#if 0
#elif defined(__GNUC__) && defined(ARMV6_ENABLED)
#endif
#else
#error This platform cannot use safe refcount, compile with NO_THREADS or implement it.
#endif
struct SafeRefCount {
REFCOUNT_T count;
uint32_t count;
public:
// destroy() is called when weak_count_ drops to zero.
@ -314,7 +62,7 @@ public:
return atomic_conditional_increment(&count) != 0;
}
int refval() { //true on success
uint32_t refval() { //true on success
return atomic_conditional_increment(&count);
}
@ -328,17 +76,15 @@ public:
return false;
}
long get() const { // nothrow
uint32_t get() const { // nothrow
return static_cast<REFCOUNT_GET_T>(count);
return count;
}
void init(int p_value = 1) {
void init(uint32_t p_value = 1) {
count = p_value;
};
}
};
#endif // no thread safe
#endif