1468 lines
43 KiB
C++
1468 lines
43 KiB
C++
#ifdef NEDMALLOC_ENABLED
|
|
/* Alternative malloc implementation for multiple threads without
|
|
lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas
|
|
|
|
Boost Software License - Version 1.0 - August 17th, 2003
|
|
|
|
Permission is hereby granted, free of charge, to any person or organization
|
|
obtaining a copy of the software and accompanying documentation covered by
|
|
this license (the "Software") to use, reproduce, display, distribute,
|
|
execute, and transmit the Software, and to prepare derivative works of the
|
|
Software, and to permit third-parties to whom the Software is furnished to
|
|
do so, all subject to the following:
|
|
|
|
The copyright notices in the Software and this entire statement, including
|
|
the above license grant, this restriction and the following disclaimer,
|
|
must be included in all copies of the Software, in whole or in part, and
|
|
all derivative works of the Software, unless such copies or derivative
|
|
works are solely in the form of machine-executable object code generated by
|
|
a source language processor.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifdef _MSC_VER
|
|
/* Enable full aliasing on MSVC */
|
|
/*#pragma optimize("a", on)*/
|
|
#pragma warning(push)
|
|
#pragma warning(disable:4100) /* unreferenced formal parameter */
|
|
#pragma warning(disable:4127) /* conditional expression is constant */
|
|
#pragma warning(disable:4706) /* assignment within conditional expression */
|
|
#endif
|
|
|
|
/*#define ENABLE_TOLERANT_NEDMALLOC 1*/
|
|
/*#define ENABLE_FAST_HEAP_DETECTION 1*/
|
|
/*#define NEDMALLOC_DEBUG 1*/
|
|
|
|
/*#define FULLSANITYCHECKS*/
|
|
/* If link time code generation is on, don't force or prevent inlining */
|
|
#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS)
|
|
#define FORCEINLINE
|
|
#define NOINLINE
|
|
#endif
|
|
|
|
|
|
#include "nedmalloc.h"
|
|
#ifdef WIN32
|
|
#include <malloc.h>
|
|
#include <stddef.h>
|
|
#endif
|
|
#if USE_ALLOCATOR==1
|
|
#define MSPACES 1
|
|
#define ONLY_MSPACES 1
|
|
#endif
|
|
#define USE_DL_PREFIX 1
|
|
#ifndef USE_LOCKS
|
|
#define USE_LOCKS 1
|
|
#endif
|
|
#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */
|
|
#ifndef NEDMALLOC_DEBUG
|
|
#if defined(DEBUG) || defined(_DEBUG)
|
|
#define NEDMALLOC_DEBUG 1
|
|
#else
|
|
#define NEDMALLOC_DEBUG 0
|
|
#endif
|
|
#endif
|
|
/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */
|
|
#undef DEBUG
|
|
#undef _DEBUG
|
|
#if NEDMALLOC_DEBUG
|
|
#define _DEBUG
|
|
#define DEBUG 1
|
|
#else
|
|
#define DEBUG 0
|
|
#endif
|
|
#ifdef NDEBUG /* Disable assert checking on release builds */
|
|
#undef DEBUG
|
|
#undef _DEBUG
|
|
#endif
|
|
/* The default of 64Kb means we spend too much time kernel-side */
|
|
#ifndef DEFAULT_GRANULARITY
|
|
#define DEFAULT_GRANULARITY (1*1024*1024)
|
|
#if DEBUG
|
|
#define DEFAULT_GRANULARITY_ALIGNED
|
|
#endif
|
|
#endif
|
|
/*#define USE_SPIN_LOCKS 0*/
|
|
|
|
|
|
#include "malloc.c.h"
|
|
#ifdef NDEBUG /* Disable assert checking on release builds */
|
|
#undef DEBUG
|
|
#elif !NEDMALLOC_DEBUG
|
|
#ifdef __GNUC__
|
|
#warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.
|
|
#elif defined(_MSC_VER)
|
|
#pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.")
|
|
#endif
|
|
#endif
|
|
|
|
/* The maximum concurrent threads in a pool possible */
|
|
#ifndef MAXTHREADSINPOOL
|
|
#define MAXTHREADSINPOOL 16
|
|
#endif
|
|
/* The maximum number of threadcaches which can be allocated */
|
|
#ifndef THREADCACHEMAXCACHES
|
|
#define THREADCACHEMAXCACHES 256
|
|
#endif
|
|
/* The maximum size to be allocated from the thread cache */
|
|
#ifndef THREADCACHEMAX
|
|
#define THREADCACHEMAX 8192
|
|
#endif
|
|
#if 0
|
|
/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
|
|
#define THREADCACHEMAXBINS ((13-4)*2)
|
|
#else
|
|
/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
|
|
#define THREADCACHEMAXBINS (13-4)
|
|
#endif
|
|
/* Point at which the free space in a thread cache is garbage collected */
|
|
#ifndef THREADCACHEMAXFREESPACE
|
|
#define THREADCACHEMAXFREESPACE (512*1024)
|
|
#endif
|
|
|
|
|
|
#ifdef WIN32
|
|
#define TLSVAR DWORD
|
|
#define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
|
|
#define TLSFREE(k) (!TlsFree(k))
|
|
#define TLSGET(k) TlsGetValue(k)
|
|
#define TLSSET(k, a) (!TlsSetValue(k, a))
|
|
#ifdef DEBUG
|
|
static LPVOID ChkedTlsGetValue(DWORD idx)
|
|
{
|
|
LPVOID ret=TlsGetValue(idx);
|
|
assert(S_OK==GetLastError());
|
|
return ret;
|
|
}
|
|
#undef TLSGET
|
|
#define TLSGET(k) ChkedTlsGetValue(k)
|
|
#endif
|
|
#else
|
|
#define TLSVAR pthread_key_t
|
|
#define TLSALLOC(k) pthread_key_create(k, 0)
|
|
#define TLSFREE(k) pthread_key_delete(k)
|
|
#define TLSGET(k) pthread_getspecific(k)
|
|
#define TLSSET(k, a) pthread_setspecific(k, a)
|
|
#endif
|
|
|
|
#if defined(__cplusplus)
|
|
#if !defined(NO_NED_NAMESPACE)
|
|
namespace nedalloc {
|
|
#else
|
|
extern "C" {
|
|
#endif
|
|
#endif
|
|
|
|
#if USE_ALLOCATOR==0
|
|
static void *unsupported_operation(const char *opname) THROWSPEC
|
|
{
|
|
fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname);
|
|
abort();
|
|
return 0;
|
|
}
|
|
static size_t mspacecounter=(size_t) 0xdeadbeef;
|
|
#endif
|
|
#ifndef ENABLE_FAST_HEAP_DETECTION
|
|
static void *RESTRICT leastusedaddress;
|
|
static size_t largestusedblock;
|
|
#endif
|
|
|
|
static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
|
|
{
|
|
void *RESTRICT ret=0;
|
|
size_t _alignment=alignment;
|
|
#if USE_MAGIC_HEADERS
|
|
size_t *_ret=0;
|
|
size+=alignment+3*sizeof(size_t);
|
|
_alignment=0;
|
|
#endif
|
|
#if USE_ALLOCATOR==0
|
|
ret=_alignment ?
|
|
#ifdef _MSC_VER
|
|
/* This is the MSVCRT equivalent */
|
|
_aligned_malloc(size, _alignment)
|
|
#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
|
|
/* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */
|
|
memalign(_alignment, size)
|
|
#else
|
|
#error Cannot aligned allocate with the memory allocator of an unknown system!
|
|
#endif
|
|
: malloc(size);
|
|
#elif USE_ALLOCATOR==1
|
|
ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size);
|
|
#ifndef ENABLE_FAST_HEAP_DETECTION
|
|
if(ret)
|
|
{
|
|
size_t truesize=chunksize(mem2chunk(ret));
|
|
if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
|
|
if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
|
|
}
|
|
#endif
|
|
#endif
|
|
if(!ret) return 0;
|
|
#if USE_MAGIC_HEADERS
|
|
_ret=(size_t *) ret;
|
|
ret=(void *)(_ret+3);
|
|
if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
|
|
for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC";
|
|
_ret[0]=(size_t) mspace;
|
|
_ret[1]=size-3*sizeof(size_t);
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
|
|
{
|
|
void *RESTRICT ret=0;
|
|
#if USE_MAGIC_HEADERS
|
|
size_t *_ret=0;
|
|
size+=alignment+3*sizeof(size_t);
|
|
#endif
|
|
#if USE_ALLOCATOR==0
|
|
ret=calloc(1, size);
|
|
#elif USE_ALLOCATOR==1
|
|
ret=mspace_calloc((mstate) mspace, 1, size);
|
|
#ifndef ENABLE_FAST_HEAP_DETECTION
|
|
if(ret)
|
|
{
|
|
size_t truesize=chunksize(mem2chunk(ret));
|
|
if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
|
|
if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
|
|
}
|
|
#endif
|
|
#endif
|
|
if(!ret) return 0;
|
|
#if USE_MAGIC_HEADERS
|
|
_ret=(size_t *) ret;
|
|
ret=(void *)(_ret+3);
|
|
if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
|
|
for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
|
|
_ret[0]=(size_t) mspace;
|
|
_ret[1]=size-3*sizeof(size_t);
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC
|
|
{
|
|
void *RESTRICT ret=0;
|
|
#if USE_MAGIC_HEADERS
|
|
mstate oldmspace=0;
|
|
size_t *_ret=0, *_mem=(size_t *) mem-3;
|
|
#endif
|
|
if(isforeign)
|
|
{ /* Transfer */
|
|
#if USE_MAGIC_HEADERS
|
|
assert(_mem[0]!=*(size_t *) "NEDMALOC");
|
|
#endif
|
|
if((ret=CallMalloc(mspace, newsize, 0)))
|
|
{
|
|
#if defined(DEBUG)
|
|
printf("*** nedmalloc frees system allocated block %p\n", mem);
|
|
#endif
|
|
memcpy(ret, mem, oldsize<newsize ? oldsize : newsize);
|
|
free(mem);
|
|
}
|
|
return ret;
|
|
}
|
|
#if USE_MAGIC_HEADERS
|
|
assert(_mem[0]==*(size_t *) "NEDMALOC");
|
|
newsize+=3*sizeof(size_t);
|
|
oldmspace=(mstate) _mem[1];
|
|
assert(oldsize>=_mem[2]);
|
|
for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
|
|
mem=(void *)(++_mem);
|
|
#endif
|
|
#if USE_ALLOCATOR==0
|
|
ret=realloc(mem, newsize);
|
|
#elif USE_ALLOCATOR==1
|
|
ret=mspace_realloc((mstate) mspace, mem, newsize);
|
|
#ifndef ENABLE_FAST_HEAP_DETECTION
|
|
if(ret)
|
|
{
|
|
size_t truesize=chunksize(mem2chunk(ret));
|
|
if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
|
|
}
|
|
#endif
|
|
#endif
|
|
if(!ret)
|
|
{ /* Put it back the way it was */
|
|
#if USE_MAGIC_HEADERS
|
|
for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC");
|
|
#endif
|
|
return 0;
|
|
}
|
|
#if USE_MAGIC_HEADERS
|
|
_ret=(size_t *) ret;
|
|
ret=(void *)(_ret+3);
|
|
for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
|
|
_ret[0]=(size_t) mspace;
|
|
_ret[1]=newsize-3*sizeof(size_t);
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC
|
|
{
|
|
#if USE_MAGIC_HEADERS
|
|
mstate oldmspace=0;
|
|
size_t *_mem=(size_t *) mem-3, oldsize=0;
|
|
#endif
|
|
if(isforeign)
|
|
{
|
|
#if USE_MAGIC_HEADERS
|
|
assert(_mem[0]!=*(size_t *) "NEDMALOC");
|
|
#endif
|
|
#if defined(DEBUG)
|
|
printf("*** nedmalloc frees system allocated block %p\n", mem);
|
|
#endif
|
|
free(mem);
|
|
return;
|
|
}
|
|
#if USE_MAGIC_HEADERS
|
|
assert(_mem[0]==*(size_t *) "NEDMALOC");
|
|
oldmspace=(mstate) _mem[1];
|
|
oldsize=_mem[2];
|
|
for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
|
|
mem=(void *)(++_mem);
|
|
#endif
|
|
#if USE_ALLOCATOR==0
|
|
free(mem);
|
|
#elif USE_ALLOCATOR==1
|
|
mspace_free((mstate) mspace, mem);
|
|
#endif
|
|
}
|
|
|
|
static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC
|
|
{
|
|
if(mem)
|
|
{
|
|
#if USE_MAGIC_HEADERS
|
|
size_t *_mem=(size_t *) mem-3;
|
|
if(_mem[0]==*(size_t *) "NEDMALOC")
|
|
{
|
|
return (mstate) _mem[1];
|
|
}
|
|
else return 0;
|
|
#else
|
|
#if USE_ALLOCATOR==0
|
|
/* Fail everything */
|
|
return 0;
|
|
#elif USE_ALLOCATOR==1
|
|
#ifdef ENABLE_FAST_HEAP_DETECTION
|
|
#ifdef WIN32
|
|
/* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header
|
|
which looks like:
|
|
normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ]
|
|
mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ]
|
|
|
|
On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land).
|
|
*/
|
|
#pragma pack(push, 1)
|
|
struct _HEAP_ENTRY
|
|
{
|
|
USHORT Size;
|
|
USHORT PreviousSize;
|
|
UCHAR Cookie; /* SegmentIndex */
|
|
UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */
|
|
UCHAR UnusedBytes;
|
|
UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */
|
|
} *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1;
|
|
#pragma pack(pop)
|
|
unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2;
|
|
result1=header & mask1; /* Positive testing for NT heap */
|
|
result2=header & mask2; /* Positive testing for dlmalloc */
|
|
if(result1==0x00000100 && result2!=0x00000102)
|
|
{ /* This is likely a NT heap block */
|
|
return 0;
|
|
}
|
|
#endif
|
|
#ifdef __linux__
|
|
/* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish
|
|
when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr
|
|
down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */
|
|
mchunkptr p=mem2chunk(mem);
|
|
mstate fm=get_mstate_for(p);
|
|
/* If it's a ptmalloc2 block, fm is likely to be some crazy value */
|
|
if(!is_aligned(fm)) return 0;
|
|
if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
|
|
if(ok_magic(fm))
|
|
return fm;
|
|
else
|
|
return 0;
|
|
if(1) { }
|
|
#endif
|
|
else
|
|
{
|
|
mchunkptr p=mem2chunk(mem);
|
|
mstate fm=get_mstate_for(p);
|
|
assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
|
|
if(ok_magic(fm))
|
|
return fm;
|
|
}
|
|
#else
|
|
//#ifdef WIN32
|
|
// __try
|
|
//#endif
|
|
{
|
|
/* We try to return zero here if it isn't one of our own blocks, however
|
|
the current block annotation scheme used by dlmalloc makes it impossible
|
|
to be absolutely sure of avoiding a segfault.
|
|
|
|
mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block;
|
|
mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS
|
|
FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently
|
|
in use unless mmap), bit 2 is UNUSED and currently is always zero.
|
|
*/
|
|
register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */
|
|
register size_t largestusedblock_=largestusedblock;
|
|
if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */
|
|
if(mem<leastusedaddress_) return 0; /* Simple but effective */
|
|
{
|
|
mchunkptr p=mem2chunk(mem);
|
|
mstate fm=0;
|
|
int ismmapped=is_mmapped(p);
|
|
if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0;
|
|
/* Reduced uncertainty by 0.5^2 = 25.0% */
|
|
/* size should never exceed largestusedblock */
|
|
if(chunksize(p)>largestusedblock_) return 0;
|
|
/* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */
|
|
/* Having sanity checked prev_foot and head, check next block */
|
|
if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0;
|
|
/* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */
|
|
#if 0
|
|
/* If previous block is free, check that its next block pointer equals us */
|
|
if(!ismmapped && !pinuse(p))
|
|
if(next_chunk(prev_chunk(p))!=p) return 0;
|
|
/* We could start comparing prev_foot's for similarity but it starts getting slow. */
|
|
#endif
|
|
fm = get_mstate_for(p);
|
|
if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0;
|
|
if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
|
|
assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
|
|
if(ok_magic(fm))
|
|
return fm;
|
|
}
|
|
}
|
|
//#ifdef WIN32
|
|
// __except(1) { }
|
|
//#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC
|
|
{
|
|
if(mem)
|
|
{
|
|
if(isforeign) *isforeign=1;
|
|
#if USE_MAGIC_HEADERS
|
|
{
|
|
size_t *_mem=(size_t *) mem-3;
|
|
if(_mem[0]==*(size_t *) "NEDMALOC")
|
|
{
|
|
mstate mspace=(mstate) _mem[1];
|
|
size_t size=_mem[2];
|
|
if(isforeign) *isforeign=0;
|
|
return size;
|
|
}
|
|
}
|
|
#elif USE_ALLOCATOR==1
|
|
if(nedblkmstate(mem))
|
|
{
|
|
mchunkptr p=mem2chunk(mem);
|
|
if(isforeign) *isforeign=0;
|
|
return chunksize(p)-overhead_for(p);
|
|
}
|
|
#ifdef DEBUG
|
|
else
|
|
{
|
|
int a=1; /* Set breakpoints here if needed */
|
|
}
|
|
#endif
|
|
#endif
|
|
#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0
|
|
#ifdef _MSC_VER
|
|
/* This is the MSVCRT equivalent */
|
|
return _msize(mem);
|
|
#elif defined(__linux__)
|
|
/* This is the glibc/ptmalloc2/dlmalloc equivalent. */
|
|
return malloc_usable_size(mem);
|
|
#elif defined(__FreeBSD__) || defined(__APPLE__)
|
|
/* This is the BSD libc equivalent. */
|
|
return malloc_size(mem);
|
|
#else
|
|
#error Cannot tolerate the memory allocator of an unknown system!
|
|
#endif
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); }
|
|
NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); }
|
|
NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); }
|
|
NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); }
|
|
NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); }
|
|
NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); }
|
|
NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); }
|
|
NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); }
|
|
NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); }
|
|
void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); }
|
|
NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); }
|
|
NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); }
|
|
NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); }
|
|
|
|
struct threadcacheblk_t;
|
|
typedef struct threadcacheblk_t threadcacheblk;
|
|
struct threadcacheblk_t
|
|
{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
|
|
#ifdef FULLSANITYCHECKS
|
|
unsigned int magic;
|
|
#endif
|
|
unsigned int lastUsed, size;
|
|
threadcacheblk *next, *prev;
|
|
};
|
|
typedef struct threadcache_t
|
|
{
|
|
#ifdef FULLSANITYCHECKS
|
|
unsigned int magic1;
|
|
#endif
|
|
int mymspace; /* Last mspace entry this thread used */
|
|
long threadid;
|
|
unsigned int mallocs, frees, successes;
|
|
size_t freeInCache; /* How much free space is stored in this cache */
|
|
threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
|
|
#ifdef FULLSANITYCHECKS
|
|
unsigned int magic2;
|
|
#endif
|
|
} threadcache;
|
|
struct nedpool_t
|
|
{
|
|
MLOCK_T mutex;
|
|
void *uservalue;
|
|
int threads; /* Max entries in m to use */
|
|
threadcache *caches[THREADCACHEMAXCACHES];
|
|
TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
|
|
mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */
|
|
};
|
|
static nedpool syspool;
|
|
|
|
static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC
|
|
{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */
|
|
unsigned int topbit, size=(unsigned int)(_size>>4);
|
|
/* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */
|
|
|
|
#if defined(__GNUC__)
|
|
topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
|
|
#elif defined(_MSC_VER) && _MSC_VER>=1300
|
|
{
|
|
unsigned long bsrTopBit;
|
|
|
|
_BitScanReverse(&bsrTopBit, size);
|
|
|
|
topbit = bsrTopBit;
|
|
}
|
|
#else
|
|
#if 0
|
|
union {
|
|
unsigned asInt[2];
|
|
double asDouble;
|
|
};
|
|
int n;
|
|
|
|
asDouble = (double)size + 0.5;
|
|
topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
|
|
#else
|
|
{
|
|
unsigned int x=size;
|
|
x = x | (x >> 1);
|
|
x = x | (x >> 2);
|
|
x = x | (x >> 4);
|
|
x = x | (x >> 8);
|
|
x = x | (x >>16);
|
|
x = ~x;
|
|
x = x - ((x >> 1) & 0x55555555);
|
|
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
|
x = (x + (x >> 4)) & 0x0F0F0F0F;
|
|
x = x + (x << 8);
|
|
x = x + (x << 16);
|
|
topbit=31 - (x >> 24);
|
|
}
|
|
#endif
|
|
#endif
|
|
return topbit;
|
|
}
|
|
|
|
|
|
#ifdef FULLSANITYCHECKS
|
|
static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
|
|
{
|
|
assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
|
|
if(ptr[0] && ptr[1])
|
|
{
|
|
assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
|
|
assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
|
|
assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
|
|
assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
|
|
assert(!ptr[0]->prev);
|
|
assert(!ptr[1]->next);
|
|
if(ptr[0]==ptr[1])
|
|
{
|
|
assert(!ptr[0]->next);
|
|
assert(!ptr[1]->prev);
|
|
}
|
|
}
|
|
}
|
|
static void tcfullsanitycheck(threadcache *tc) THROWSPEC
|
|
{
|
|
threadcacheblk **tcbptr=tc->bins;
|
|
int n;
|
|
for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
|
|
{
|
|
threadcacheblk *b, *ob=0;
|
|
tcsanitycheck(tcbptr);
|
|
for(b=tcbptr[0]; b; ob=b, b=b->next)
|
|
{
|
|
assert(*(unsigned int *) "NEDN"==b->magic);
|
|
assert(!ob || ob->next==b);
|
|
assert(!ob || b->prev==ob);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC
|
|
{
|
|
#ifdef FULLSANITYCHECKS
|
|
tcfullsanitycheck(tc);
|
|
#endif
|
|
if(tc->freeInCache)
|
|
{
|
|
threadcacheblk **tcbptr=tc->bins;
|
|
int n;
|
|
for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
|
|
{
|
|
threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */
|
|
/*tcsanitycheck(tcbptr);*/
|
|
for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
|
|
{
|
|
threadcacheblk *f=*tcb;
|
|
size_t blksize=f->size; /*nedblksize(f);*/
|
|
assert(blksize<=nedblksize(0, f));
|
|
assert(blksize);
|
|
#ifdef FULLSANITYCHECKS
|
|
assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
|
|
#endif
|
|
*tcb=(*tcb)->prev;
|
|
if(*tcb)
|
|
(*tcb)->next=0;
|
|
else
|
|
*tcbptr=0;
|
|
tc->freeInCache-=blksize;
|
|
assert((long) tc->freeInCache>=0);
|
|
CallFree(0, f, 0);
|
|
/*tcsanitycheck(tcbptr);*/
|
|
}
|
|
}
|
|
}
|
|
#ifdef FULLSANITYCHECKS
|
|
tcfullsanitycheck(tc);
|
|
#endif
|
|
}
|
|
static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC
|
|
{
|
|
if(p->caches)
|
|
{
|
|
threadcache *tc;
|
|
int n;
|
|
for(n=0; n<THREADCACHEMAXCACHES; n++)
|
|
{
|
|
if((tc=p->caches[n]))
|
|
{
|
|
tc->frees++;
|
|
RemoveCacheEntries(p, tc, 0);
|
|
assert(!tc->freeInCache);
|
|
tc->mymspace=-1;
|
|
tc->threadid=0;
|
|
CallFree(0, tc, 0);
|
|
p->caches[n]=0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC
|
|
{
|
|
threadcache *tc=0;
|
|
int n, end;
|
|
ACQUIRE_LOCK(&p->mutex);
|
|
for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
|
|
if(THREADCACHEMAXCACHES==n)
|
|
{ /* List exhausted, so disable for this thread */
|
|
RELEASE_LOCK(&p->mutex);
|
|
return 0;
|
|
}
|
|
tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0);
|
|
if(!tc)
|
|
{
|
|
RELEASE_LOCK(&p->mutex);
|
|
return 0;
|
|
}
|
|
#ifdef FULLSANITYCHECKS
|
|
tc->magic1=*(unsigned int *)"NEDMALC1";
|
|
tc->magic2=*(unsigned int *)"NEDMALC2";
|
|
#endif
|
|
tc->threadid=(long)(size_t)CURRENT_THREAD;
|
|
for(end=0; p->m[end]; end++);
|
|
tc->mymspace=abs(tc->threadid) % end;
|
|
RELEASE_LOCK(&p->mutex);
|
|
if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
|
|
return tc;
|
|
}
|
|
|
|
static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC
|
|
{
|
|
void *RESTRICT ret=0;
|
|
size_t size=*_size, blksize=0;
|
|
unsigned int bestsize;
|
|
unsigned int idx=size2binidx(size);
|
|
threadcacheblk *RESTRICT blk, **RESTRICT binsptr;
|
|
#ifdef FULLSANITYCHECKS
|
|
tcfullsanitycheck(tc);
|
|
#endif
|
|
/* Calculate best fit bin size */
|
|
bestsize=1<<(idx+4);
|
|
#if 0
|
|
/* Finer grained bin fit */
|
|
idx<<=1;
|
|
if(size>bestsize)
|
|
{
|
|
idx++;
|
|
bestsize+=bestsize>>1;
|
|
}
|
|
if(size>bestsize)
|
|
{
|
|
idx++;
|
|
bestsize=1<<(4+(idx>>1));
|
|
}
|
|
#else
|
|
if(size>bestsize)
|
|
{
|
|
idx++;
|
|
bestsize<<=1;
|
|
}
|
|
#endif
|
|
assert(bestsize>=size);
|
|
if(size<bestsize) size=bestsize;
|
|
assert(size<=THREADCACHEMAX);
|
|
assert(idx<=THREADCACHEMAXBINS);
|
|
binsptr=&tc->bins[idx*2];
|
|
/* Try to match close, but move up a bin if necessary */
|
|
blk=*binsptr;
|
|
if(!blk || blk->size<size)
|
|
{ /* Bump it up a bin */
|
|
if(idx<THREADCACHEMAXBINS)
|
|
{
|
|
idx++;
|
|
binsptr+=2;
|
|
blk=*binsptr;
|
|
}
|
|
}
|
|
if(blk)
|
|
{
|
|
blksize=blk->size; /*nedblksize(blk);*/
|
|
assert(nedblksize(0, blk)>=blksize);
|
|
assert(blksize>=size);
|
|
if(blk->next)
|
|
blk->next->prev=0;
|
|
*binsptr=blk->next;
|
|
if(!*binsptr)
|
|
binsptr[1]=0;
|
|
#ifdef FULLSANITYCHECKS
|
|
blk->magic=0;
|
|
#endif
|
|
assert(binsptr[0]!=blk && binsptr[1]!=blk);
|
|
assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
|
|
/*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/
|
|
ret=(void *) blk;
|
|
}
|
|
++tc->mallocs;
|
|
if(ret)
|
|
{
|
|
assert(blksize>=size);
|
|
++tc->successes;
|
|
tc->freeInCache-=blksize;
|
|
assert((long) tc->freeInCache>=0);
|
|
}
|
|
#if defined(DEBUG) && 0
|
|
if(!(tc->mallocs & 0xfff))
|
|
{
|
|
printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
|
|
(float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
|
|
}
|
|
#endif
|
|
#ifdef FULLSANITYCHECKS
|
|
tcfullsanitycheck(tc);
|
|
#endif
|
|
*_size=size;
|
|
return ret;
|
|
}
|
|
static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC
|
|
{
|
|
unsigned int age=THREADCACHEMAXFREESPACE/8192;
|
|
/*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
|
|
while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
|
|
{
|
|
RemoveCacheEntries(p, tc, age);
|
|
/*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
|
|
age>>=1;
|
|
}
|
|
/*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
|
|
}
|
|
static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC
|
|
{
|
|
unsigned int bestsize;
|
|
unsigned int idx=size2binidx(size);
|
|
threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem;
|
|
assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
|
|
#ifdef DEBUG
|
|
/* Make sure this is a valid memory block */
|
|
assert(nedblksize(0, mem));
|
|
#endif
|
|
#ifdef FULLSANITYCHECKS
|
|
tcfullsanitycheck(tc);
|
|
#endif
|
|
/* Calculate best fit bin size */
|
|
bestsize=1<<(idx+4);
|
|
#if 0
|
|
/* Finer grained bin fit */
|
|
idx<<=1;
|
|
if(size>bestsize)
|
|
{
|
|
unsigned int biggerbestsize=bestsize+bestsize<<1;
|
|
if(size>=biggerbestsize)
|
|
{
|
|
idx++;
|
|
bestsize=biggerbestsize;
|
|
}
|
|
}
|
|
#endif
|
|
if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */
|
|
size=bestsize;
|
|
binsptr=&tc->bins[idx*2];
|
|
assert(idx<=THREADCACHEMAXBINS);
|
|
if(tck==*binsptr)
|
|
{
|
|
fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck);
|
|
abort();
|
|
}
|
|
#ifdef FULLSANITYCHECKS
|
|
tck->magic=*(unsigned int *) "NEDN";
|
|
#endif
|
|
tck->lastUsed=++tc->frees;
|
|
tck->size=(unsigned int) size;
|
|
tck->next=*binsptr;
|
|
tck->prev=0;
|
|
if(tck->next)
|
|
tck->next->prev=tck;
|
|
else
|
|
binsptr[1]=tck;
|
|
assert(!*binsptr || (*binsptr)->size==tck->size);
|
|
*binsptr=tck;
|
|
assert(tck==tc->bins[idx*2]);
|
|
assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
|
|
/*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
|
|
tc->freeInCache+=size;
|
|
#ifdef FULLSANITYCHECKS
|
|
tcfullsanitycheck(tc);
|
|
#endif
|
|
#if 1
|
|
if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
|
|
ReleaseFreeInCache(p, tc, mymspace);
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
|
|
static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC
|
|
{ /* threads is -1 for system pool */
|
|
ensure_initialization();
|
|
ACQUIRE_MALLOC_GLOBAL_LOCK();
|
|
if(p->threads) goto done;
|
|
if(INITIAL_LOCK(&p->mutex)) goto err;
|
|
if(TLSALLOC(&p->mycache)) goto err;
|
|
#if USE_ALLOCATOR==0
|
|
p->m[0]=(mstate) mspacecounter++;
|
|
#elif USE_ALLOCATOR==1
|
|
if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
|
|
p->m[0]->extp=p;
|
|
#endif
|
|
p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
|
|
done:
|
|
RELEASE_MALLOC_GLOBAL_LOCK();
|
|
return 1;
|
|
err:
|
|
if(threads<0)
|
|
abort(); /* If you can't allocate for system pool, we're screwed */
|
|
DestroyCaches(p);
|
|
if(p->m[0])
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
destroy_mspace(p->m[0]);
|
|
#endif
|
|
p->m[0]=0;
|
|
}
|
|
if(p->mycache)
|
|
{
|
|
if(TLSFREE(p->mycache)) abort();
|
|
p->mycache=0;
|
|
}
|
|
RELEASE_MALLOC_GLOBAL_LOCK();
|
|
return 0;
|
|
}
|
|
static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC
|
|
{ /* Gets called when thread's last used mspace is in use. The strategy
|
|
is to run through the list of all available mspaces looking for an
|
|
unlocked one and if we fail, we create a new one so long as we don't
|
|
exceed p->threads */
|
|
int n, end;
|
|
for(n=end=*lastUsed+1; p->m[n]; end=++n)
|
|
{
|
|
if(TRY_LOCK(&p->m[n]->mutex)) goto found;
|
|
}
|
|
for(n=0; n<*lastUsed && p->m[n]; n++)
|
|
{
|
|
if(TRY_LOCK(&p->m[n]->mutex)) goto found;
|
|
}
|
|
if(end<p->threads)
|
|
{
|
|
mstate temp;
|
|
#if USE_ALLOCATOR==0
|
|
temp=(mstate) mspacecounter++;
|
|
#elif USE_ALLOCATOR==1
|
|
if(!(temp=(mstate) create_mspace(size, 1)))
|
|
goto badexit;
|
|
#endif
|
|
/* Now we're ready to modify the lists, we lock */
|
|
ACQUIRE_LOCK(&p->mutex);
|
|
while(p->m[end] && end<p->threads)
|
|
end++;
|
|
if(end>=p->threads)
|
|
{ /* Drat, must destroy it now */
|
|
RELEASE_LOCK(&p->mutex);
|
|
#if USE_ALLOCATOR==1
|
|
destroy_mspace((mstate) temp);
|
|
#endif
|
|
goto badexit;
|
|
}
|
|
/* We really want to make sure this goes into memory now but we
|
|
have to be careful of breaking aliasing rules, so write it twice */
|
|
*((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
|
|
ACQUIRE_LOCK(&p->m[end]->mutex);
|
|
/*printf("Created mspace idx %d\n", end);*/
|
|
RELEASE_LOCK(&p->mutex);
|
|
n=end;
|
|
goto found;
|
|
}
|
|
/* Let it lock on the last one it used */
|
|
badexit:
|
|
ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
|
|
return p->m[*lastUsed];
|
|
found:
|
|
*lastUsed=n;
|
|
if(tc)
|
|
tc->mymspace=n;
|
|
else
|
|
{
|
|
if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
|
|
}
|
|
return p->m[n];
|
|
}
|
|
|
|
typedef struct PoolList_t
|
|
{
|
|
size_t size; /* Size of list */
|
|
size_t length; /* Actual entries in list */
|
|
#ifdef DEBUG
|
|
nedpool *list[1]; /* Force testing of list expansion */
|
|
#else
|
|
nedpool *list[16];
|
|
#endif
|
|
} PoolList;
|
|
static MLOCK_T poollistlock;
|
|
static PoolList *poollist;
|
|
NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
|
|
{
|
|
nedpool *ret=0;
|
|
if(!poollist)
|
|
{
|
|
PoolList *newpoollist=0;
|
|
if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0;
|
|
INITIAL_LOCK(&poollistlock);
|
|
ACQUIRE_LOCK(&poollistlock);
|
|
poollist=newpoollist;
|
|
poollist->size=sizeof(poollist->list)/sizeof(nedpool *);
|
|
}
|
|
else
|
|
ACQUIRE_LOCK(&poollistlock);
|
|
if(poollist->length==poollist->size)
|
|
{
|
|
PoolList *newpoollist=0;
|
|
size_t newsize=0;
|
|
newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *);
|
|
if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit;
|
|
poollist=newpoollist;
|
|
memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0]));
|
|
poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1;
|
|
assert(poollist->size>poollist->length);
|
|
}
|
|
if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit;
|
|
if(!InitPool(ret, capacity, threads))
|
|
{
|
|
nedpfree(0, ret);
|
|
goto badexit;
|
|
}
|
|
poollist->list[poollist->length++]=ret;
|
|
badexit:
|
|
RELEASE_LOCK(&poollistlock);
|
|
return ret;
|
|
}
|
|
void neddestroypool(nedpool *p) THROWSPEC
|
|
{
|
|
unsigned int n;
|
|
ACQUIRE_LOCK(&p->mutex);
|
|
DestroyCaches(p);
|
|
for(n=0; p->m[n]; n++)
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
destroy_mspace(p->m[n]);
|
|
#endif
|
|
p->m[n]=0;
|
|
}
|
|
RELEASE_LOCK(&p->mutex);
|
|
if(TLSFREE(p->mycache)) abort();
|
|
nedpfree(0, p);
|
|
ACQUIRE_LOCK(&poollistlock);
|
|
assert(poollist);
|
|
for(n=0; n<poollist->length && poollist->list[n]!=p; n++);
|
|
assert(n!=poollist->length);
|
|
memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]);
|
|
if(!--poollist->length)
|
|
{
|
|
assert(!poollist->list[0]);
|
|
nedpfree(0, poollist);
|
|
poollist=0;
|
|
}
|
|
RELEASE_LOCK(&poollistlock);
|
|
}
|
|
void neddestroysyspool() THROWSPEC
|
|
{
|
|
nedpool *p=&syspool;
|
|
int n;
|
|
ACQUIRE_LOCK(&p->mutex);
|
|
DestroyCaches(p);
|
|
for(n=0; p->m[n]; n++)
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
destroy_mspace(p->m[n]);
|
|
#endif
|
|
p->m[n]=0;
|
|
}
|
|
/* Render syspool unusable */
|
|
for(n=0; n<THREADCACHEMAXCACHES; n++)
|
|
p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
|
|
for(n=0; n<MAXTHREADSINPOOL+1; n++)
|
|
p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
|
|
if(TLSFREE(p->mycache)) abort();
|
|
RELEASE_LOCK(&p->mutex);
|
|
}
|
|
nedpool **nedpoollist() THROWSPEC
|
|
{
|
|
nedpool **ret=0;
|
|
if(poollist)
|
|
{
|
|
ACQUIRE_LOCK(&poollistlock);
|
|
if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit;
|
|
memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *));
|
|
badexit:
|
|
RELEASE_LOCK(&poollistlock);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void nedpsetvalue(nedpool *p, void *v) THROWSPEC
|
|
{
|
|
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
|
|
p->uservalue=v;
|
|
}
|
|
void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
|
|
{
|
|
nedpool *np=0;
|
|
mstate fm=nedblkmstate(mem);
|
|
if(!fm || !fm->extp) return 0;
|
|
np=(nedpool *) fm->extp;
|
|
if(p) *p=np;
|
|
return np->uservalue;
|
|
}
|
|
|
|
void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC
|
|
{
|
|
int mycache;
|
|
if(!p)
|
|
{
|
|
p=&syspool;
|
|
if(!syspool.threads) InitPool(&syspool, 0, -1);
|
|
}
|
|
mycache=(int)(size_t) TLSGET(p->mycache);
|
|
if(!mycache)
|
|
{ /* Set to mspace 0 */
|
|
if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort();
|
|
}
|
|
else if(mycache>0)
|
|
{ /* Set to last used mspace */
|
|
threadcache *tc=p->caches[mycache-1];
|
|
#if defined(DEBUG)
|
|
printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
|
|
100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
|
|
#endif
|
|
if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
|
|
tc->frees++;
|
|
RemoveCacheEntries(p, tc, 0);
|
|
assert(!tc->freeInCache);
|
|
if(disable)
|
|
{
|
|
tc->mymspace=-1;
|
|
tc->threadid=0;
|
|
CallFree(0, p->caches[mycache-1], 0);
|
|
p->caches[mycache-1]=0;
|
|
}
|
|
}
|
|
}
|
|
void neddisablethreadcache(nedpool *p) THROWSPEC
|
|
{
|
|
nedtrimthreadcache(p, 1);
|
|
}
|
|
|
|
#define GETMSPACE(m,p,tc,ms,s,action) \
|
|
do \
|
|
{ \
|
|
mstate m = GetMSpace((p),(tc),(ms),(s)); \
|
|
action; \
|
|
if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \
|
|
} while (0)
|
|
|
|
static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC
|
|
{ /* Returns a locked and ready for use mspace */
|
|
mstate m=p->m[mymspace];
|
|
assert(m);
|
|
#if USE_ALLOCATOR==1
|
|
if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);
|
|
/*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
|
|
#endif
|
|
return m;
|
|
}
|
|
static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC
|
|
{
|
|
*p=&syspool;
|
|
if(!syspool.threads) InitPool(&syspool, 0, -1);
|
|
}
|
|
static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC
|
|
{
|
|
if(!mycache)
|
|
{ /* Need to allocate a new cache */
|
|
*tc=AllocCache(*p);
|
|
if(!*tc)
|
|
{ /* Disable */
|
|
if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort();
|
|
*mymspace=0;
|
|
}
|
|
else
|
|
*mymspace=(*tc)->mymspace;
|
|
}
|
|
else
|
|
{ /* Cache disabled, but we do have an assigned thread pool */
|
|
*tc=0;
|
|
*mymspace=-mycache-1;
|
|
}
|
|
}
|
|
static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC
|
|
{
|
|
int mycache;
|
|
if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
|
|
if(!*p)
|
|
GetThreadCache_cold1(p);
|
|
mycache=(int)(size_t) TLSGET((*p)->mycache);
|
|
if(mycache>0)
|
|
{ /* Already have a cache */
|
|
*tc=(*p)->caches[mycache-1];
|
|
*mymspace=(*tc)->mymspace;
|
|
}
|
|
else GetThreadCache_cold2(p, tc, mymspace, mycache);
|
|
assert(*mymspace>=0);
|
|
assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid);
|
|
#ifdef FULLSANITYCHECKS
|
|
if(*tc)
|
|
{
|
|
if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
|
|
{
|
|
abort();
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
|
|
{
|
|
void *ret=0;
|
|
threadcache *tc;
|
|
int mymspace;
|
|
GetThreadCache(&p, &tc, &mymspace, &size);
|
|
#if THREADCACHEMAX
|
|
if(tc && size<=THREADCACHEMAX)
|
|
{ /* Use the thread cache */
|
|
ret=threadcache_malloc(p, tc, &size);
|
|
}
|
|
#endif
|
|
if(!ret)
|
|
{ /* Use this thread's mspace */
|
|
GETMSPACE(m, p, tc, mymspace, size,
|
|
ret=CallMalloc(m, size, 0));
|
|
}
|
|
return ret;
|
|
}
|
|
NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
|
|
{
|
|
size_t rsize=size*no;
|
|
void *ret=0;
|
|
threadcache *tc;
|
|
int mymspace;
|
|
GetThreadCache(&p, &tc, &mymspace, &rsize);
|
|
#if THREADCACHEMAX
|
|
if(tc && rsize<=THREADCACHEMAX)
|
|
{ /* Use the thread cache */
|
|
if((ret=threadcache_malloc(p, tc, &rsize)))
|
|
memset(ret, 0, rsize);
|
|
}
|
|
#endif
|
|
if(!ret)
|
|
{ /* Use this thread's mspace */
|
|
GETMSPACE(m, p, tc, mymspace, rsize,
|
|
ret=CallCalloc(m, rsize, 0));
|
|
}
|
|
return ret;
|
|
}
|
|
NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
|
|
{
|
|
void *ret=0;
|
|
threadcache *tc;
|
|
int mymspace, isforeign=1;
|
|
size_t memsize;
|
|
if(!mem) return nedpmalloc(p, size);
|
|
memsize=nedblksize(&isforeign, mem);
|
|
assert(memsize);
|
|
if(!memsize)
|
|
{
|
|
fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n");
|
|
abort();
|
|
}
|
|
else if(size<=memsize && memsize-size<
|
|
#ifdef DEBUG
|
|
32
|
|
#else
|
|
1024
|
|
#endif
|
|
) /* If realloc size is within 1Kb smaller than existing, noop it */
|
|
return mem;
|
|
GetThreadCache(&p, &tc, &mymspace, &size);
|
|
#if THREADCACHEMAX
|
|
if(tc && size && size<=THREADCACHEMAX)
|
|
{ /* Use the thread cache */
|
|
if((ret=threadcache_malloc(p, tc, &size)))
|
|
{
|
|
memcpy(ret, mem, memsize<size ? memsize : size);
|
|
if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
|
|
threadcache_free(p, tc, mymspace, mem, memsize);
|
|
else
|
|
CallFree(0, mem, isforeign);
|
|
}
|
|
}
|
|
#endif
|
|
if(!ret)
|
|
{ /* Reallocs always happen in the mspace they happened in, so skip
|
|
locking the preferred mspace for this thread */
|
|
ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size);
|
|
}
|
|
return ret;
|
|
}
|
|
void nedpfree(nedpool *p, void *mem) THROWSPEC
|
|
{ /* Frees always happen in the mspace they happened in, so skip
|
|
locking the preferred mspace for this thread */
|
|
threadcache *tc;
|
|
int mymspace, isforeign=1;
|
|
size_t memsize;
|
|
if(!mem)
|
|
{ /* If you tried this on FreeBSD you'd be sorry! */
|
|
#ifdef DEBUG
|
|
fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n");
|
|
#endif
|
|
return;
|
|
}
|
|
memsize=nedblksize(&isforeign, mem);
|
|
assert(memsize);
|
|
if(!memsize)
|
|
{
|
|
fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n");
|
|
abort();
|
|
}
|
|
GetThreadCache(&p, &tc, &mymspace, 0);
|
|
#if THREADCACHEMAX
|
|
if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
|
|
threadcache_free(p, tc, mymspace, mem, memsize);
|
|
else
|
|
#endif
|
|
CallFree(0, mem, isforeign);
|
|
}
|
|
NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
|
|
{
|
|
void *ret;
|
|
threadcache *tc;
|
|
int mymspace;
|
|
GetThreadCache(&p, &tc, &mymspace, &bytes);
|
|
{ /* Use this thread's mspace */
|
|
GETMSPACE(m, p, tc, mymspace, bytes,
|
|
ret=CallMalloc(m, bytes, alignment));
|
|
}
|
|
return ret;
|
|
}
|
|
struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC
|
|
{
|
|
int n;
|
|
struct nedmallinfo ret={0};
|
|
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
|
|
for(n=0; p->m[n]; n++)
|
|
{
|
|
#if USE_ALLOCATOR==1 && !NO_MALLINFO
|
|
struct mallinfo t=mspace_mallinfo(p->m[n]);
|
|
ret.arena+=t.arena;
|
|
ret.ordblks+=t.ordblks;
|
|
ret.hblkhd+=t.hblkhd;
|
|
ret.usmblks+=t.usmblks;
|
|
ret.uordblks+=t.uordblks;
|
|
ret.fordblks+=t.fordblks;
|
|
ret.keepcost+=t.keepcost;
|
|
#endif
|
|
}
|
|
return ret;
|
|
}
|
|
int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
return mspace_mallopt(parno, value);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
if(granularity) *granularity=mparams.granularity;
|
|
if(magic) *magic=mparams.magic;
|
|
return (void *) &syspool;
|
|
#else
|
|
if(granularity) *granularity=0;
|
|
if(magic) *magic=0;
|
|
return 0;
|
|
#endif
|
|
}
|
|
int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
|
|
{
|
|
int n, ret=0;
|
|
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
|
|
for(n=0; p->m[n]; n++)
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
ret+=mspace_trim(p->m[n], pad);
|
|
#endif
|
|
}
|
|
return ret;
|
|
}
|
|
void nedpmalloc_stats(nedpool *p) THROWSPEC
|
|
{
|
|
int n;
|
|
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
|
|
for(n=0; p->m[n]; n++)
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
mspace_malloc_stats(p->m[n]);
|
|
#endif
|
|
}
|
|
}
|
|
size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
|
|
{
|
|
size_t ret=0;
|
|
int n;
|
|
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
|
|
for(n=0; p->m[n]; n++)
|
|
{
|
|
#if USE_ALLOCATOR==1
|
|
ret+=mspace_footprint(p->m[n]);
|
|
#endif
|
|
}
|
|
return ret;
|
|
}
|
|
NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
|
|
{
|
|
void **ret;
|
|
threadcache *tc;
|
|
int mymspace;
|
|
GetThreadCache(&p, &tc, &mymspace, &elemsize);
|
|
#if USE_ALLOCATOR==0
|
|
GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
|
|
ret=unsupported_operation("independent_calloc"));
|
|
#elif USE_ALLOCATOR==1
|
|
GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
|
|
ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
|
|
#endif
|
|
return ret;
|
|
}
|
|
NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
|
|
{
|
|
void **ret;
|
|
threadcache *tc;
|
|
int mymspace;
|
|
size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
|
|
if(!adjustedsizes) return 0;
|
|
for(i=0; i<elems; i++)
|
|
adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
|
|
GetThreadCache(&p, &tc, &mymspace, 0);
|
|
#if USE_ALLOCATOR==0
|
|
GETMSPACE(m, p, tc, mymspace, 0,
|
|
ret=unsupported_operation("independent_comalloc"));
|
|
#elif USE_ALLOCATOR==1
|
|
GETMSPACE(m, p, tc, mymspace, 0,
|
|
ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
#if defined(__cplusplus)
|
|
}
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
#endif
|