godot/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
jfons 767e374dce Upgrade Embree to the latest official release.
Since Embree v3.13.0 supports AARCH64, switch back to the
official repo instead of using Embree-aarch64.

`thirdparty/embree/patches/godot-changes.patch` should now contain
an accurate diff of the changes done to the library.
2021-05-21 17:00:24 +02:00

527 lines
22 KiB
C++

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
#include "../geometry/primitive.h"
#include "../builders/bvh_builder_msmblur.h"
#include "../builders/heuristic_binning_array_aligned.h"
#include "../builders/heuristic_binning_array_unaligned.h"
#include "../builders/heuristic_timesplit_array.h"
namespace embree
{
namespace isa
{
struct BVHBuilderHairMSMBlur
{
/*! settings for msmblur builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
};
struct BuildRecord
{
public:
__forceinline BuildRecord () {}
__forceinline BuildRecord (size_t depth)
: depth(depth) {}
__forceinline BuildRecord (const SetMB& prims, size_t depth)
: depth(depth), prims(prims) {}
__forceinline size_t size() const {
return prims.size();
}
public:
size_t depth; //!< depth of the root of this subtree
SetMB prims; //!< the list of primitives
};
template<typename NodeRef,
typename RecalculatePrimRef,
typename CreateAllocFunc,
typename CreateAABBNodeMBFunc,
typename SetAABBNodeMBFunc,
typename CreateOBBNodeMBFunc,
typename SetOBBNodeMBFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
class BuilderT
{
ALIGNED_CLASS_(16);
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
typedef FastAllocator::CachedAllocator Allocator;
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
public:
BuilderT (Scene* scene,
const RecalculatePrimRef& recalculatePrimRef,
const CreateAllocFunc& createAlloc,
const CreateAABBNodeMBFunc& createAABBNodeMB,
const SetAABBNodeMBFunc& setAABBNodeMB,
const CreateOBBNodeMBFunc& createOBBNodeMB,
const SetOBBNodeMBFunc& setOBBNodeMB,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const Settings settings)
: cfg(settings),
scene(scene),
recalculatePrimRef(recalculatePrimRef),
createAlloc(createAlloc),
createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
createLeaf(createLeaf),
progressMonitor(progressMonitor),
unalignedHeuristic(scene),
temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
private:
/*! checks if all primitives are from the same geometry */
__forceinline bool sameGeometry(const SetMB& set)
{
mvector<PrimRefMB>& prims = *set.prims;
unsigned int firstGeomID = prims[set.begin()].geomID();
for (size_t i=set.begin()+1; i<set.end(); i++) {
if (prims[i].geomID() != firstGeomID){
return false;
}
}
return true;
}
/*! performs some split if SAH approaches fail */
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
{
mvector<PrimRefMB>& prims = *set.prims;
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
PrimInfoMB linfo = empty;
for (size_t i=begin; i<center; i++)
linfo.add_primref(prims[i]);
PrimInfoMB rinfo = empty;
for (size_t i=center; i<end; i++)
rinfo.add_primref(prims[i]);
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
}
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
{
assert(set.size() > 1);
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfoMB linfo(empty);
PrimInfoMB rinfo(empty);
unsigned int geomID = (*set.prims)[begin].geomID();
size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
[ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
}
/*! creates a large leaf that could be larger than supported by the BVH */
NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (current.depth > cfg.maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* special case when directly creating leaf without any splits that could shrink time_range */
bool force_split = false;
if (current.depth == 1 && current.size() > 0)
{
BBox1f c = empty;
BBox1f p = current.prims.time_range;
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
mvector<PrimRefMB>& prims = *current.prims.prims;
c.extend(prims[i].time_range);
}
force_split = c.lower > p.lower || c.upper < p.upper;
}
/* create leaf for few primitives */
if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
return createLeaf(current.prims,alloc);
/* fill all children by always splitting the largest one */
LocalChildList children(current);
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
do {
/* find best child with largest bounding box area */
int bestChild = -1;
size_t bestSize = 0;
for (unsigned i=0; i<children.size(); i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
continue;
force_split = false;
/* remember child with largest size */
if (children[i].size() > bestSize) {
bestSize = children[i].size();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
BuildRecord left(current.depth+1);
BuildRecord right(current.depth+1);
if (!sameGeometry(children[bestChild].prims)) {
splitByGeometry(children[bestChild].prims,left.prims,right.prims);
} else {
splitFallback(children[bestChild].prims,left.prims,right.prims);
}
children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
} while (children.size() < cfg.branchingFactor);
/* detect time_ranges that have shrunken */
bool timesplit = false;
for (size_t i=0; i<children.size(); i++) {
const BBox1f c = children[i].prims.time_range;
const BBox1f p = current.prims.time_range;
timesplit |= c.lower > p.lower || c.upper < p.upper;
}
/* create node */
NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
LBBox3fa bounds = empty;
for (size_t i=0; i<children.size(); i++) {
values[i] = createLargeLeaf(children[i],alloc);
bounds.extend(values[i].lbounds);
}
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
if (timesplit)
bounds = current.prims.linearBounds(recalculatePrimRef);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
/*! performs split */
std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
{
/* variable to track the SAH of the best splitting approach */
float bestSAH = inf;
const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
/* perform standard binning in aligned space */
HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
float alignedObjectSAH = alignedObjectSplit.splitSAH();
bestSAH = min(alignedObjectSAH,bestSAH);
/* perform standard binning in unaligned space */
UnalignedHeuristicBinning::Split unalignedObjectSplit;
LinearSpace3fa uspace;
float unalignedObjectSAH = inf;
if (alignedObjectSAH > 0.7f*leafSAH) {
uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
bestSAH = min(unalignedObjectSAH,bestSAH);
}
/* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
float temporal_split_sah = inf;
typename HeuristicTemporal::Split temporal_split;
if (bestSAH > 0.5f*leafSAH) {
if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
temporal_split_sah = temporal_split.splitSAH();
bestSAH = min(temporal_split_sah,bestSAH);
}
}
/* perform fallback split if SAH heuristics failed */
if (unlikely(!std::isfinite(bestSAH))) {
current.prims.deterministic_order();
splitFallback(current.prims,lrecord.prims,rrecord.prims);
}
/* perform aligned split if this is best */
else if (likely(bestSAH == alignedObjectSAH)) {
alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
}
/* perform unaligned split if this is best */
else if (likely(bestSAH == unalignedObjectSAH)) {
unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
aligned = false;
}
/* perform temporal split if this is best */
else if (likely(bestSAH == temporal_split_sah)) {
timesplit = true;
return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
}
else
assert(false);
return std::unique_ptr<mvector<PrimRefMB>>();
}
/*! recursive build */
NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
{
/* get thread local allocator */
if (!alloc)
alloc = createAlloc();
/* call memory monitor function to signal progress */
if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
progressMonitor(current.size());
/* create leaf node */
if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
current.prims.deterministic_order();
return createLargeLeaf(current,alloc);
}
/* fill all children by always splitting the one with the largest surface area */
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
LocalChildList children(current);
bool aligned = true;
bool timesplit = false;
do {
/* find best child with largest bounding box area */
ssize_t bestChild = -1;
float bestArea = neg_inf;
for (size_t i=0; i<children.size(); i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.minLeafSize)
continue;
/* remember child with largest area */
const float A = children[i].prims.halfArea();
if (A > bestArea) {
bestArea = children[i].prims.halfArea();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
BuildRecord left(current.depth+1);
BuildRecord right(current.depth+1);
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
children.split(bestChild,left,right,std::move(new_vector));
} while (children.size() < cfg.branchingFactor);
/* detect time_ranges that have shrunken */
for (size_t i=0; i<children.size(); i++) {
const BBox1f c = children[i].prims.time_range;
const BBox1f p = current.prims.time_range;
timesplit |= c.lower > p.lower || c.upper < p.upper;
}
/* create time split node */
if (timesplit)
{
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
/* spawn tasks or ... */
if (current.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
values[i] = recurse(children[i],nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequential */
else {
for (size_t i=0; i<children.size(); i++) {
values[i] = recurse(children[i],alloc,false);
}
}
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
/* create aligned node */
else if (aligned)
{
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
/* spawn tasks or ... */
if (current.size() > SINGLE_THREADED_THRESHOLD)
{
LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
values[i] = recurse(children[i],nullptr,true);
cbounds[i] = values[i].lbounds;
_mm_mfence(); // to allow non-temporal stores during build
}
});
LBBox3fa bounds = empty;
for (size_t i=0; i<children.size(); i++)
bounds.extend(cbounds[i]);
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
/* ... continue sequentially */
else
{
LBBox3fa bounds = empty;
for (size_t i=0; i<children.size(); i++) {
values[i] = recurse(children[i],alloc,false);
bounds.extend(values[i].lbounds);
}
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
}
/* create unaligned node */
else
{
const NodeRef node = createOBBNodeMB(alloc);
/* spawn tasks or ... */
if (current.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
const auto child = recurse(children[i],nullptr,true);
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequentially */
else
{
for (size_t i=0; i<children.size(); i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
const auto child = recurse(children[i],alloc,false);
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
}
}
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
}
public:
/*! entry point into builder */
NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
{
BuildRecord record(SetMB(pinfo,&prims),1);
auto root = recurse(record,nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
private:
Settings cfg;
Scene* scene;
const RecalculatePrimRef& recalculatePrimRef;
const CreateAllocFunc& createAlloc;
const CreateAABBNodeMBFunc& createAABBNodeMB;
const SetAABBNodeMBFunc& setAABBNodeMB;
const CreateOBBNodeMBFunc& createOBBNodeMB;
const SetOBBNodeMBFunc& setOBBNodeMB;
const CreateLeafFunc& createLeaf;
const ProgressMonitor& progressMonitor;
private:
HeuristicBinning alignedHeuristic;
UnalignedHeuristicBinning unalignedHeuristic;
HeuristicTemporal temporalSplitHeuristic;
};
template<typename NodeRef,
typename RecalculatePrimRef,
typename CreateAllocFunc,
typename CreateAABBNodeMBFunc,
typename SetAABBNodeMBFunc,
typename CreateOBBNodeMBFunc,
typename SetOBBNodeMBFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
const RecalculatePrimRef& recalculatePrimRef,
const CreateAllocFunc& createAlloc,
const CreateAABBNodeMBFunc& createAABBNodeMB,
const SetAABBNodeMBFunc& setAABBNodeMB,
const CreateOBBNodeMBFunc& createOBBNodeMB,
const SetOBBNodeMBFunc& setOBBNodeMB,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const Settings settings)
{
typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
CreateLeafFunc,ProgressMonitor> Builder;
Builder builder(scene,recalculatePrimRef,createAlloc,
createAABBNodeMB,setAABBNodeMB,
createOBBNodeMB,setOBBNodeMB,
createLeaf,progressMonitor,settings);
return builder(prims,pinfo);
}
};
}
}