// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../bvh/bvh.h" #include "../geometry/primitive.h" #include "../builders/bvh_builder_sah.h" #include "../builders/heuristic_binning_array_aligned.h" #include "../builders/heuristic_binning_array_unaligned.h" #include "../builders/heuristic_strand_array.h" #define NUM_HAIR_OBJECT_BINS 32 namespace embree { namespace isa { struct BVHBuilderHair { /*! settings for builder */ struct Settings { /*! default settings */ Settings () : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {} public: size_t branchingFactor; //!< branching factor of BVH to build size_t maxDepth; //!< maximum depth of BVH to build size_t logBlockSize; //!< log2 of blocksize for SAH heuristic size_t minLeafSize; //!< minimum size of a leaf size_t maxLeafSize; //!< maximum size of a leaf size_t finished_range_threshold; //!< finished range threshold }; template<typename NodeRef, typename CreateAllocFunc, typename CreateAABBNodeFunc, typename SetAABBNodeFunc, typename CreateOBBNodeFunc, typename SetOBBNodeFunc, typename CreateLeafFunc, typename ProgressMonitor, typename ReportFinishedRangeFunc> class BuilderT { ALIGNED_CLASS_(16); friend struct BVHBuilderHair; typedef FastAllocator::CachedAllocator Allocator; typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH; typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH; typedef HeuristicStrandSplit HeuristicStrandSplitSAH; static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build static const size_t travCostAligned = 1; static const size_t travCostUnaligned = 5; static const size_t intCost = 6; BuilderT (Scene* scene, PrimRef* prims, const CreateAllocFunc& createAlloc, const CreateAABBNodeFunc& createAABBNode, const SetAABBNodeFunc& setAABBNode, const CreateOBBNodeFunc& createOBBNode, const SetOBBNodeFunc& setOBBNode, const CreateLeafFunc& createLeaf, const ProgressMonitor& progressMonitor, const ReportFinishedRangeFunc& reportFinishedRange, const Settings settings) : cfg(settings), prims(prims), createAlloc(createAlloc), createAABBNode(createAABBNode), setAABBNode(setAABBNode), createOBBNode(createOBBNode), setOBBNode(setOBBNode), createLeaf(createLeaf), progressMonitor(progressMonitor), reportFinishedRange(reportFinishedRange), alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {} /*! checks if all primitives are from the same geometry */ __forceinline bool sameGeometry(const PrimInfoRange& range) { if (range.size() == 0) return true; unsigned int firstGeomID = prims[range.begin()].geomID(); for (size_t i=range.begin()+1; i<range.end(); i++) { if (prims[i].geomID() != firstGeomID){ return false; } } return true; } /*! creates a large leaf that could be larger than supported by the BVH */ NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc) { /* this should never occur but is a fatal error */ if (depth > cfg.maxDepth) throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached"); /* create leaf for few primitives */ if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo)) return createLeaf(prims,pinfo,alloc); /* fill all children by always splitting the largest one */ PrimInfoRange children[MAX_BRANCHING_FACTOR]; unsigned numChildren = 1; children[0] = pinfo; do { /* find best child with largest bounding box area */ int bestChild = -1; size_t bestSize = 0; for (unsigned i=0; i<numChildren; i++) { /* ignore leaves as they cannot get split */ if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i])) continue; /* remember child with largest size */ if (children[i].size() > bestSize) { bestSize = children[i].size(); bestChild = i; } } if (bestChild == -1) break; /*! split best child into left and right child */ __aligned(64) PrimInfoRange left, right; if (!sameGeometry(children[bestChild])) { alignedHeuristic.splitByGeometry(children[bestChild],left,right); } else { alignedHeuristic.splitFallback(children[bestChild],left,right); } /* add new children left and right */ children[bestChild] = children[numChildren-1]; children[numChildren-1] = left; children[numChildren+0] = right; numChildren++; } while (numChildren < cfg.branchingFactor); /* create node */ auto node = createAABBNode(alloc); for (size_t i=0; i<numChildren; i++) { const NodeRef child = createLargeLeaf(depth+1,children[i],alloc); setAABBNode(node,i,child,children[i].geomBounds); } return node; } /*! performs split */ __noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack { /* variable to track the SAH of the best splitting approach */ float bestSAH = inf; const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize; const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds); /* try standard binning in aligned space */ float alignedObjectSAH = inf; HeuristicBinningSAH::Split alignedObjectSplit; if (aligned) { alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize); alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH(); bestSAH = min(alignedObjectSAH,bestSAH); } /* try standard binning in unaligned space */ UnalignedHeuristicBinningSAH::Split unalignedObjectSplit; LinearSpace3fa uspace; float unalignedObjectSAH = inf; if (bestSAH > 0.7f*leafSAH) { uspace = unalignedHeuristic.computeAlignedSpace(pinfo); const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace); unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace); unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH(); bestSAH = min(unalignedObjectSAH,bestSAH); } /* try splitting into two strands */ HeuristicStrandSplitSAH::Split strandSplit; float strandSAH = inf; if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) { strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize); strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH(); bestSAH = min(strandSAH,bestSAH); } /* fallback if SAH heuristics failed */ if (unlikely(!std::isfinite(bestSAH))) { alignedHeuristic.deterministic_order(pinfo); alignedHeuristic.splitFallback(pinfo,linfo,rinfo); } /* perform aligned split if this is best */ else if (bestSAH == alignedObjectSAH) { alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo); } /* perform unaligned split if this is best */ else if (bestSAH == unalignedObjectSAH) { unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo); aligned = false; } /* perform strand split if this is best */ else if (bestSAH == strandSAH) { strandHeuristic.split(strandSplit,pinfo,linfo,rinfo); aligned = false; } /* can never happen */ else assert(false); } /*! recursive build */ NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier) { /* get thread local allocator */ if (!alloc) alloc = createAlloc(); /* call memory monitor function to signal progress */ if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD) progressMonitor(pinfo.size()); PrimInfoRange children[MAX_BRANCHING_FACTOR]; /* create leaf node */ if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) { alignedHeuristic.deterministic_order(pinfo); return createLargeLeaf(depth,pinfo,alloc); } /* fill all children by always splitting the one with the largest surface area */ size_t numChildren = 1; children[0] = pinfo; bool aligned = true; do { /* find best child with largest bounding box area */ ssize_t bestChild = -1; float bestArea = neg_inf; for (size_t i=0; i<numChildren; i++) { /* ignore leaves as they cannot get split */ if (children[i].size() <= cfg.minLeafSize) continue; /* remember child with largest area */ if (area(children[i].geomBounds) > bestArea) { bestArea = area(children[i].geomBounds); bestChild = i; } } if (bestChild == -1) break; /*! split best child into left and right child */ PrimInfoRange left, right; split(children[bestChild],left,right,aligned); /* add new children left and right */ children[bestChild] = children[numChildren-1]; children[numChildren-1] = left; children[numChildren+0] = right; numChildren++; } while (numChildren < cfg.branchingFactor); NodeRef node; /* create aligned node */ if (aligned) { node = createAABBNode(alloc); /* spawn tasks or ... */ if (pinfo.size() > SINGLE_THREADED_THRESHOLD) { parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { for (size_t i=r.begin(); i<r.end(); i++) { const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds); _mm_mfence(); // to allow non-temporal stores during build } }); } /* ... continue sequentially */ else { for (size_t i=0; i<numChildren; i++) { const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds); } } } /* create unaligned node */ else { node = createOBBNode(alloc); /* spawn tasks or ... */ if (pinfo.size() > SINGLE_THREADED_THRESHOLD) { parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { for (size_t i=r.begin(); i<r.end(); i++) { const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space); const OBBox3fa obounds(space,sinfo.geomBounds); const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds); _mm_mfence(); // to allow non-temporal stores during build } }); } /* ... continue sequentially */ else { for (size_t i=0; i<numChildren; i++) { const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space); const OBBox3fa obounds(space,sinfo.geomBounds); const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold; setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds); } } } /* reports a finished range of primrefs */ if (unlikely(alloc_barrier)) reportFinishedRange(pinfo); return node; } private: Settings cfg; PrimRef* prims; const CreateAllocFunc& createAlloc; const CreateAABBNodeFunc& createAABBNode; const SetAABBNodeFunc& setAABBNode; const CreateOBBNodeFunc& createOBBNode; const SetOBBNodeFunc& setOBBNode; const CreateLeafFunc& createLeaf; const ProgressMonitor& progressMonitor; const ReportFinishedRangeFunc& reportFinishedRange; private: HeuristicBinningSAH alignedHeuristic; UnalignedHeuristicBinningSAH unalignedHeuristic; HeuristicStrandSplitSAH strandHeuristic; }; template<typename NodeRef, typename CreateAllocFunc, typename CreateAABBNodeFunc, typename SetAABBNodeFunc, typename CreateOBBNodeFunc, typename SetOBBNodeFunc, typename CreateLeafFunc, typename ProgressMonitor, typename ReportFinishedRangeFunc> static NodeRef build (const CreateAllocFunc& createAlloc, const CreateAABBNodeFunc& createAABBNode, const SetAABBNodeFunc& setAABBNode, const CreateOBBNodeFunc& createOBBNode, const SetOBBNodeFunc& setOBBNode, const CreateLeafFunc& createLeaf, const ProgressMonitor& progressMonitor, const ReportFinishedRangeFunc& reportFinishedRange, Scene* scene, PrimRef* prims, const PrimInfo& pinfo, const Settings settings) { typedef BuilderT<NodeRef, CreateAllocFunc, CreateAABBNodeFunc,SetAABBNodeFunc, CreateOBBNodeFunc,SetOBBNodeFunc, CreateLeafFunc,ProgressMonitor, ReportFinishedRangeFunc> Builder; Builder builder(scene,prims,createAlloc, createAABBNode,setAABBNode, createOBBNode,setOBBNode, createLeaf,progressMonitor,reportFinishedRange,settings); NodeRef root = builder.recurse(1,pinfo,nullptr,true,false); _mm_mfence(); // to allow non-temporal stores during build return root; } }; } }