godot/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h

307 lines
6.5 KiB
C++

#ifndef B3_OPENCL_ARRAY_H
#define B3_OPENCL_ARRAY_H
#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
template <typename T>
class b3OpenCLArray
{
size_t m_size;
size_t m_capacity;
cl_mem m_clBuffer;
cl_context m_clContext;
cl_command_queue m_commandQueue;
bool m_ownsMemory;
bool m_allowGrowingCapacity;
void deallocate()
{
if (m_clBuffer && m_ownsMemory)
{
clReleaseMemObject(m_clBuffer);
}
m_clBuffer = 0;
m_capacity=0;
}
b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src);
B3_FORCE_INLINE size_t allocSize(size_t size)
{
return (size ? size*2 : 1);
}
public:
b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity=0, bool allowGrowingCapacity=true)
:m_size(0), m_capacity(0),m_clBuffer(0),
m_clContext(ctx),m_commandQueue(queue),
m_ownsMemory(true),m_allowGrowingCapacity(true)
{
if (initialCapacity)
{
reserve(initialCapacity);
}
m_allowGrowingCapacity = allowGrowingCapacity;
}
///this is an error-prone method with no error checking, be careful!
void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements)
{
deallocate();
m_ownsMemory = false;
m_allowGrowingCapacity = false;
m_clBuffer = buffer;
m_size = sizeInElements;
m_capacity = sizeInElements;
}
// we could enable this assignment, but need to make sure to avoid accidental deep copies
// b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
// {
// copyFromArray(src);
// return *this;
// }
cl_mem getBufferCL() const
{
return m_clBuffer;
}
virtual ~b3OpenCLArray()
{
deallocate();
m_size=0;
m_capacity=0;
}
B3_FORCE_INLINE bool push_back(const T& _Val,bool waitForCompletion=true)
{
bool result = true;
size_t sz = size();
if( sz == capacity() )
{
result = reserve( allocSize(size()) );
}
copyFromHostPointer(&_Val, 1, sz, waitForCompletion);
m_size++;
return result;
}
B3_FORCE_INLINE T forcedAt(size_t n) const
{
b3Assert(n>=0);
b3Assert(n<capacity());
T elem;
copyToHostPointer(&elem,1,n,true);
return elem;
}
B3_FORCE_INLINE T at(size_t n) const
{
b3Assert(n>=0);
b3Assert(n<size());
T elem;
copyToHostPointer(&elem,1,n,true);
return elem;
}
B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents=true)
{
bool result = true;
size_t curSize = size();
if (newsize < curSize)
{
//leave the OpenCL memory for now
} else
{
if (newsize > size())
{
result = reserve(newsize,copyOldContents);
}
//leave new data uninitialized (init in debug mode?)
//for (size_t i=curSize;i<newsize;i++) ...
}
if (result)
{
m_size = newsize;
} else
{
m_size = 0;
}
return result;
}
B3_FORCE_INLINE size_t size() const
{
return m_size;
}
B3_FORCE_INLINE size_t capacity() const
{
return m_capacity;
}
B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents=true)
{
bool result=true;
// determine new minimum length of allocated storage
if (capacity() < _Count)
{ // not enough room, reallocate
if (m_allowGrowingCapacity)
{
cl_int ciErrNum;
//create a new OpenCL buffer
size_t memSizeInBytes = sizeof(T)*_Count;
cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum);
if (ciErrNum!=CL_SUCCESS)
{
b3Error("OpenCL out-of-memory\n");
_Count = 0;
result = false;
}
//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
unsigned char* src = (unsigned char*)malloc(memSizeInBytes);
for (size_t i=0;i<memSizeInBytes;i++)
src[i] = 0xbb;
ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 );
b3Assert(ciErrNum==CL_SUCCESS);
clFinish(m_commandQueue);
free(src);
#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
if (result)
{
if (copyOldContents)
copyToCL(buf, size());
}
//deallocate the old buffer
deallocate();
m_clBuffer = buf;
m_capacity = _Count;
} else
{
//fail: assert and
b3Assert(0);
deallocate();
result=false;
}
}
return result;
}
void copyToCL(cl_mem destination, size_t numElements, size_t firstElem=0, size_t dstOffsetInElems=0) const
{
if (numElements<=0)
return;
b3Assert(m_clBuffer);
b3Assert(destination);
//likely some error, destination is same as source
b3Assert(m_clBuffer != destination);
b3Assert((firstElem+numElements)<=m_size);
cl_int status = 0;
b3Assert(numElements>0);
b3Assert(numElements<=m_size);
size_t srcOffsetBytes = sizeof(T)*firstElem;
size_t dstOffsetInBytes = sizeof(T)*dstOffsetInElems;
status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination,
srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 );
b3Assert( status == CL_SUCCESS );
}
void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true)
{
size_t newSize = srcArray.size();
bool copyOldContents = false;
resize (newSize,copyOldContents);
if (newSize)
copyFromHostPointer(&srcArray[0],newSize,0,waitForCompletion);
}
void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem= 0, bool waitForCompletion=true)
{
b3Assert(numElems+destFirstElem <= capacity());
if (numElems+destFirstElem)
{
cl_int status = 0;
size_t sizeInBytes=sizeof(T)*numElems;
status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes,
src, 0,0,0 );
b3Assert(status == CL_SUCCESS );
if (waitForCompletion)
clFinish(m_commandQueue);
} else
{
b3Error("copyFromHostPointer invalid range\n");
}
}
void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion=true) const
{
destArray.resize(this->size());
if (size())
copyToHostPointer(&destArray[0], size(),0,waitForCompletion);
}
void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem=0, bool waitForCompletion=true) const
{
b3Assert(numElem+srcFirstElem <= capacity());
if(numElem+srcFirstElem <= capacity())
{
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*srcFirstElem, sizeof(T)*numElem,
destPtr, 0,0,0 );
b3Assert( status==CL_SUCCESS );
if (waitForCompletion)
clFinish(m_commandQueue);
} else
{
b3Error("copyToHostPointer invalid range\n");
}
}
void copyFromOpenCLArray(const b3OpenCLArray& src)
{
size_t newSize = src.size();
resize(newSize);
if (size())
{
src.copyToCL(m_clBuffer,size());
}
}
};
#endif //B3_OPENCL_ARRAY_H