2021-05-20 10:49:33 +00:00
// Copyright 2009-2021 Intel Corporation
2020-12-19 13:50:20 +00:00
// SPDX-License-Identifier: Apache-2.0
# pragma once
# include "../bvh/bvh.h"
# include "../geometry/primitive.h"
# include "../builders/bvh_builder_sah.h"
# include "../builders/heuristic_binning_array_aligned.h"
# include "../builders/heuristic_binning_array_unaligned.h"
# include "../builders/heuristic_strand_array.h"
# define NUM_HAIR_OBJECT_BINS 32
namespace embree
{
namespace isa
{
struct BVHBuilderHair
{
/*! settings for builder */
struct Settings
{
/*! default settings */
Settings ( )
: branchingFactor ( 2 ) , maxDepth ( 32 ) , logBlockSize ( 0 ) , minLeafSize ( 1 ) , maxLeafSize ( 7 ) , finished_range_threshold ( inf ) { }
public :
size_t branchingFactor ; //!< branching factor of BVH to build
size_t maxDepth ; //!< maximum depth of BVH to build
size_t logBlockSize ; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize ; //!< minimum size of a leaf
size_t maxLeafSize ; //!< maximum size of a leaf
size_t finished_range_threshold ; //!< finished range threshold
} ;
template < typename NodeRef ,
typename CreateAllocFunc ,
typename CreateAABBNodeFunc ,
typename SetAABBNodeFunc ,
typename CreateOBBNodeFunc ,
typename SetOBBNodeFunc ,
typename CreateLeafFunc ,
typename ProgressMonitor ,
typename ReportFinishedRangeFunc >
class BuilderT
{
ALIGNED_CLASS_ ( 16 ) ;
friend struct BVHBuilderHair ;
typedef FastAllocator : : CachedAllocator Allocator ;
typedef HeuristicArrayBinningSAH < PrimRef , NUM_HAIR_OBJECT_BINS > HeuristicBinningSAH ;
typedef UnalignedHeuristicArrayBinningSAH < PrimRef , NUM_HAIR_OBJECT_BINS > UnalignedHeuristicBinningSAH ;
typedef HeuristicStrandSplit HeuristicStrandSplitSAH ;
static const size_t MAX_BRANCHING_FACTOR = 8 ; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8 ; //!< create balanced tree if we are that many levels before the maximum tree depth
static const size_t SINGLE_THREADED_THRESHOLD = 4096 ; //!< threshold to switch to single threaded build
static const size_t travCostAligned = 1 ;
static const size_t travCostUnaligned = 5 ;
static const size_t intCost = 6 ;
BuilderT ( Scene * scene ,
PrimRef * prims ,
const CreateAllocFunc & createAlloc ,
const CreateAABBNodeFunc & createAABBNode ,
const SetAABBNodeFunc & setAABBNode ,
const CreateOBBNodeFunc & createOBBNode ,
const SetOBBNodeFunc & setOBBNode ,
const CreateLeafFunc & createLeaf ,
const ProgressMonitor & progressMonitor ,
const ReportFinishedRangeFunc & reportFinishedRange ,
const Settings settings )
: cfg ( settings ) ,
prims ( prims ) ,
createAlloc ( createAlloc ) ,
createAABBNode ( createAABBNode ) ,
setAABBNode ( setAABBNode ) ,
createOBBNode ( createOBBNode ) ,
setOBBNode ( setOBBNode ) ,
createLeaf ( createLeaf ) ,
progressMonitor ( progressMonitor ) ,
reportFinishedRange ( reportFinishedRange ) ,
alignedHeuristic ( prims ) , unalignedHeuristic ( scene , prims ) , strandHeuristic ( scene , prims ) { }
/*! checks if all primitives are from the same geometry */
__forceinline bool sameGeometry ( const PrimInfoRange & range )
{
if ( range . size ( ) = = 0 ) return true ;
unsigned int firstGeomID = prims [ range . begin ( ) ] . geomID ( ) ;
for ( size_t i = range . begin ( ) + 1 ; i < range . end ( ) ; i + + ) {
if ( prims [ i ] . geomID ( ) ! = firstGeomID ) {
return false ;
}
}
return true ;
}
/*! creates a large leaf that could be larger than supported by the BVH */
NodeRef createLargeLeaf ( size_t depth , const PrimInfoRange & pinfo , Allocator alloc )
{
/* this should never occur but is a fatal error */
if ( depth > cfg . maxDepth )
throw_RTCError ( RTC_ERROR_UNKNOWN , " depth limit reached " ) ;
/* create leaf for few primitives */
if ( pinfo . size ( ) < = cfg . maxLeafSize & & sameGeometry ( pinfo ) )
return createLeaf ( prims , pinfo , alloc ) ;
/* fill all children by always splitting the largest one */
PrimInfoRange children [ MAX_BRANCHING_FACTOR ] ;
unsigned numChildren = 1 ;
children [ 0 ] = pinfo ;
do {
/* find best child with largest bounding box area */
int bestChild = - 1 ;
size_t bestSize = 0 ;
for ( unsigned i = 0 ; i < numChildren ; i + + )
{
/* ignore leaves as they cannot get split */
if ( children [ i ] . size ( ) < = cfg . maxLeafSize & & sameGeometry ( children [ i ] ) )
continue ;
/* remember child with largest size */
if ( children [ i ] . size ( ) > bestSize ) {
bestSize = children [ i ] . size ( ) ;
bestChild = i ;
}
}
if ( bestChild = = - 1 ) break ;
/*! split best child into left and right child */
__aligned ( 64 ) PrimInfoRange left , right ;
if ( ! sameGeometry ( children [ bestChild ] ) ) {
alignedHeuristic . splitByGeometry ( children [ bestChild ] , left , right ) ;
} else {
alignedHeuristic . splitFallback ( children [ bestChild ] , left , right ) ;
}
/* add new children left and right */
children [ bestChild ] = children [ numChildren - 1 ] ;
children [ numChildren - 1 ] = left ;
children [ numChildren + 0 ] = right ;
numChildren + + ;
} while ( numChildren < cfg . branchingFactor ) ;
/* create node */
auto node = createAABBNode ( alloc ) ;
for ( size_t i = 0 ; i < numChildren ; i + + ) {
const NodeRef child = createLargeLeaf ( depth + 1 , children [ i ] , alloc ) ;
setAABBNode ( node , i , child , children [ i ] . geomBounds ) ;
}
return node ;
}
/*! performs split */
__noinline void split ( const PrimInfoRange & pinfo , PrimInfoRange & linfo , PrimInfoRange & rinfo , bool & aligned ) // FIXME: not inlined as ICC otherwise uses much stack
{
/* variable to track the SAH of the best splitting approach */
float bestSAH = inf ;
const size_t blocks = ( pinfo . size ( ) + ( 1ull < < cfg . logBlockSize ) - 1ull ) > > cfg . logBlockSize ;
const float leafSAH = intCost * float ( blocks ) * halfArea ( pinfo . geomBounds ) ;
/* try standard binning in aligned space */
float alignedObjectSAH = inf ;
HeuristicBinningSAH : : Split alignedObjectSplit ;
if ( aligned ) {
alignedObjectSplit = alignedHeuristic . find ( pinfo , cfg . logBlockSize ) ;
alignedObjectSAH = travCostAligned * halfArea ( pinfo . geomBounds ) + intCost * alignedObjectSplit . splitSAH ( ) ;
bestSAH = min ( alignedObjectSAH , bestSAH ) ;
}
/* try standard binning in unaligned space */
UnalignedHeuristicBinningSAH : : Split unalignedObjectSplit ;
LinearSpace3fa uspace ;
float unalignedObjectSAH = inf ;
if ( bestSAH > 0.7f * leafSAH ) {
uspace = unalignedHeuristic . computeAlignedSpace ( pinfo ) ;
const PrimInfoRange sinfo = unalignedHeuristic . computePrimInfo ( pinfo , uspace ) ;
unalignedObjectSplit = unalignedHeuristic . find ( sinfo , cfg . logBlockSize , uspace ) ;
unalignedObjectSAH = travCostUnaligned * halfArea ( pinfo . geomBounds ) + intCost * unalignedObjectSplit . splitSAH ( ) ;
bestSAH = min ( unalignedObjectSAH , bestSAH ) ;
}
/* try splitting into two strands */
HeuristicStrandSplitSAH : : Split strandSplit ;
float strandSAH = inf ;
if ( bestSAH > 0.7f * leafSAH & & pinfo . size ( ) < = 256 ) {
strandSplit = strandHeuristic . find ( pinfo , cfg . logBlockSize ) ;
strandSAH = travCostUnaligned * halfArea ( pinfo . geomBounds ) + intCost * strandSplit . splitSAH ( ) ;
bestSAH = min ( strandSAH , bestSAH ) ;
}
/* fallback if SAH heuristics failed */
if ( unlikely ( ! std : : isfinite ( bestSAH ) ) )
{
alignedHeuristic . deterministic_order ( pinfo ) ;
alignedHeuristic . splitFallback ( pinfo , linfo , rinfo ) ;
}
/* perform aligned split if this is best */
else if ( bestSAH = = alignedObjectSAH ) {
alignedHeuristic . split ( alignedObjectSplit , pinfo , linfo , rinfo ) ;
}
/* perform unaligned split if this is best */
else if ( bestSAH = = unalignedObjectSAH ) {
unalignedHeuristic . split ( unalignedObjectSplit , uspace , pinfo , linfo , rinfo ) ;
aligned = false ;
}
/* perform strand split if this is best */
else if ( bestSAH = = strandSAH ) {
strandHeuristic . split ( strandSplit , pinfo , linfo , rinfo ) ;
aligned = false ;
}
/* can never happen */
else
assert ( false ) ;
}
/*! recursive build */
NodeRef recurse ( size_t depth , const PrimInfoRange & pinfo , Allocator alloc , bool toplevel , bool alloc_barrier )
{
/* get thread local allocator */
if ( ! alloc )
alloc = createAlloc ( ) ;
/* call memory monitor function to signal progress */
if ( toplevel & & pinfo . size ( ) < = SINGLE_THREADED_THRESHOLD )
progressMonitor ( pinfo . size ( ) ) ;
PrimInfoRange children [ MAX_BRANCHING_FACTOR ] ;
/* create leaf node */
if ( depth + MIN_LARGE_LEAF_LEVELS > = cfg . maxDepth | | pinfo . size ( ) < = cfg . minLeafSize ) {
alignedHeuristic . deterministic_order ( pinfo ) ;
return createLargeLeaf ( depth , pinfo , alloc ) ;
}
/* fill all children by always splitting the one with the largest surface area */
size_t numChildren = 1 ;
children [ 0 ] = pinfo ;
bool aligned = true ;
do {
/* find best child with largest bounding box area */
ssize_t bestChild = - 1 ;
float bestArea = neg_inf ;
for ( size_t i = 0 ; i < numChildren ; i + + )
{
/* ignore leaves as they cannot get split */
if ( children [ i ] . size ( ) < = cfg . minLeafSize )
continue ;
/* remember child with largest area */
if ( area ( children [ i ] . geomBounds ) > bestArea ) {
bestArea = area ( children [ i ] . geomBounds ) ;
bestChild = i ;
}
}
if ( bestChild = = - 1 ) break ;
/*! split best child into left and right child */
PrimInfoRange left , right ;
split ( children [ bestChild ] , left , right , aligned ) ;
/* add new children left and right */
children [ bestChild ] = children [ numChildren - 1 ] ;
children [ numChildren - 1 ] = left ;
children [ numChildren + 0 ] = right ;
numChildren + + ;
} while ( numChildren < cfg . branchingFactor ) ;
NodeRef node ;
/* create aligned node */
if ( aligned )
{
node = createAABBNode ( alloc ) ;
/* spawn tasks or ... */
if ( pinfo . size ( ) > SINGLE_THREADED_THRESHOLD )
{
parallel_for ( size_t ( 0 ) , numChildren , [ & ] ( const range < size_t > & r ) {
for ( size_t i = r . begin ( ) ; i < r . end ( ) ; i + + ) {
const bool child_alloc_barrier = pinfo . size ( ) > cfg . finished_range_threshold & & children [ i ] . size ( ) < = cfg . finished_range_threshold ;
setAABBNode ( node , i , recurse ( depth + 1 , children [ i ] , nullptr , true , child_alloc_barrier ) , children [ i ] . geomBounds ) ;
_mm_mfence ( ) ; // to allow non-temporal stores during build
}
} ) ;
}
/* ... continue sequentially */
else {
for ( size_t i = 0 ; i < numChildren ; i + + ) {
const bool child_alloc_barrier = pinfo . size ( ) > cfg . finished_range_threshold & & children [ i ] . size ( ) < = cfg . finished_range_threshold ;
setAABBNode ( node , i , recurse ( depth + 1 , children [ i ] , alloc , false , child_alloc_barrier ) , children [ i ] . geomBounds ) ;
}
}
}
/* create unaligned node */
else
{
node = createOBBNode ( alloc ) ;
/* spawn tasks or ... */
if ( pinfo . size ( ) > SINGLE_THREADED_THRESHOLD )
{
parallel_for ( size_t ( 0 ) , numChildren , [ & ] ( const range < size_t > & r ) {
for ( size_t i = r . begin ( ) ; i < r . end ( ) ; i + + ) {
const LinearSpace3fa space = unalignedHeuristic . computeAlignedSpace ( children [ i ] ) ;
const PrimInfoRange sinfo = unalignedHeuristic . computePrimInfo ( children [ i ] , space ) ;
const OBBox3fa obounds ( space , sinfo . geomBounds ) ;
const bool child_alloc_barrier = pinfo . size ( ) > cfg . finished_range_threshold & & children [ i ] . size ( ) < = cfg . finished_range_threshold ;
setOBBNode ( node , i , recurse ( depth + 1 , children [ i ] , nullptr , true , child_alloc_barrier ) , obounds ) ;
_mm_mfence ( ) ; // to allow non-temporal stores during build
}
} ) ;
}
/* ... continue sequentially */
else
{
for ( size_t i = 0 ; i < numChildren ; i + + ) {
const LinearSpace3fa space = unalignedHeuristic . computeAlignedSpace ( children [ i ] ) ;
const PrimInfoRange sinfo = unalignedHeuristic . computePrimInfo ( children [ i ] , space ) ;
const OBBox3fa obounds ( space , sinfo . geomBounds ) ;
const bool child_alloc_barrier = pinfo . size ( ) > cfg . finished_range_threshold & & children [ i ] . size ( ) < = cfg . finished_range_threshold ;
setOBBNode ( node , i , recurse ( depth + 1 , children [ i ] , alloc , false , child_alloc_barrier ) , obounds ) ;
}
}
}
/* reports a finished range of primrefs */
if ( unlikely ( alloc_barrier ) )
reportFinishedRange ( pinfo ) ;
return node ;
}
private :
Settings cfg ;
PrimRef * prims ;
const CreateAllocFunc & createAlloc ;
const CreateAABBNodeFunc & createAABBNode ;
const SetAABBNodeFunc & setAABBNode ;
const CreateOBBNodeFunc & createOBBNode ;
const SetOBBNodeFunc & setOBBNode ;
const CreateLeafFunc & createLeaf ;
const ProgressMonitor & progressMonitor ;
const ReportFinishedRangeFunc & reportFinishedRange ;
private :
HeuristicBinningSAH alignedHeuristic ;
UnalignedHeuristicBinningSAH unalignedHeuristic ;
HeuristicStrandSplitSAH strandHeuristic ;
} ;
template < typename NodeRef ,
typename CreateAllocFunc ,
typename CreateAABBNodeFunc ,
typename SetAABBNodeFunc ,
typename CreateOBBNodeFunc ,
typename SetOBBNodeFunc ,
typename CreateLeafFunc ,
typename ProgressMonitor ,
typename ReportFinishedRangeFunc >
static NodeRef build ( const CreateAllocFunc & createAlloc ,
const CreateAABBNodeFunc & createAABBNode ,
const SetAABBNodeFunc & setAABBNode ,
const CreateOBBNodeFunc & createOBBNode ,
const SetOBBNodeFunc & setOBBNode ,
const CreateLeafFunc & createLeaf ,
const ProgressMonitor & progressMonitor ,
const ReportFinishedRangeFunc & reportFinishedRange ,
Scene * scene ,
PrimRef * prims ,
const PrimInfo & pinfo ,
const Settings settings )
{
typedef BuilderT < NodeRef ,
CreateAllocFunc ,
CreateAABBNodeFunc , SetAABBNodeFunc ,
CreateOBBNodeFunc , SetOBBNodeFunc ,
CreateLeafFunc , ProgressMonitor ,
ReportFinishedRangeFunc > Builder ;
Builder builder ( scene , prims , createAlloc ,
createAABBNode , setAABBNode ,
createOBBNode , setOBBNode ,
createLeaf , progressMonitor , reportFinishedRange , settings ) ;
NodeRef root = builder . recurse ( 1 , pinfo , nullptr , true , false ) ;
_mm_mfence ( ) ; // to allow non-temporal stores during build
return root ;
}
} ;
}
}