2020-12-19 13:50:20 +00:00
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
# pragma once
# include "../common/ray.h"
# include "../common/scene_subdiv_mesh.h"
# include "../bvh/bvh.h"
# include "../subdiv/tessellation.h"
# include "../subdiv/tessellation_cache.h"
# include "subdivpatch1.h"
namespace embree
{
namespace isa
{
class GridSOA
{
public :
/*! GridSOA constructor */
GridSOA ( const SubdivPatch1Base * patches , const unsigned time_steps ,
const unsigned x0 , const unsigned x1 , const unsigned y0 , const unsigned y1 , const unsigned swidth , const unsigned sheight ,
const SubdivMesh * const geom , const size_t totalBvhBytes , const size_t gridBytes , BBox3fa * bounds_o = nullptr ) ;
/*! Subgrid creation */
template < typename Allocator >
static GridSOA * create ( const SubdivPatch1Base * patches , const unsigned time_steps ,
unsigned x0 , unsigned x1 , unsigned y0 , unsigned y1 ,
const Scene * scene , Allocator & alloc , BBox3fa * bounds_o = nullptr )
{
const unsigned width = x1 - x0 + 1 ;
const unsigned height = y1 - y0 + 1 ;
const GridRange range ( 0 , width - 1 , 0 , height - 1 ) ;
size_t bvhBytes = 0 ;
if ( time_steps = = 1 )
bvhBytes = getBVHBytes ( range , sizeof ( BVH4 : : AABBNode ) , 0 ) ;
else {
bvhBytes = ( time_steps - 1 ) * getBVHBytes ( range , sizeof ( BVH4 : : AABBNodeMB ) , 0 ) ;
bvhBytes + = getTemporalBVHBytes ( make_range ( 0 , int ( time_steps - 1 ) ) , sizeof ( BVH4 : : AABBNodeMB4D ) ) ;
}
const size_t gridBytes = 4 * size_t ( width ) * size_t ( height ) * sizeof ( float ) ;
size_t rootBytes = time_steps * sizeof ( BVH4 : : NodeRef ) ;
2021-05-04 09:07:12 +00:00
# if !defined(__X86_64__) && !defined(__aarch64__)
2020-12-19 13:50:20 +00:00
rootBytes + = 4 ; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
# endif
void * data = alloc ( offsetof ( GridSOA , data ) + bvhBytes + time_steps * gridBytes + rootBytes ) ;
assert ( data ) ;
return new ( data ) GridSOA ( patches , time_steps , x0 , x1 , y0 , y1 , patches - > grid_u_res , patches - > grid_v_res , scene - > get < SubdivMesh > ( patches - > geomID ( ) ) , bvhBytes , gridBytes , bounds_o ) ;
}
/*! Grid creation */
template < typename Allocator >
static GridSOA * create ( const SubdivPatch1Base * const patches , const unsigned time_steps ,
const Scene * scene , const Allocator & alloc , BBox3fa * bounds_o = nullptr )
{
return create ( patches , time_steps , 0 , patches - > grid_u_res - 1 , 0 , patches - > grid_v_res - 1 , scene , alloc , bounds_o ) ;
}
/*! returns reference to root */
__forceinline BVH4 : : NodeRef & root ( size_t t = 0 ) { return ( BVH4 : : NodeRef & ) data [ rootOffset + t * sizeof ( BVH4 : : NodeRef ) ] ; }
__forceinline const BVH4 : : NodeRef & root ( size_t t = 0 ) const { return ( BVH4 : : NodeRef & ) data [ rootOffset + t * sizeof ( BVH4 : : NodeRef ) ] ; }
/*! returns pointer to BVH array */
2021-05-04 09:07:12 +00:00
__forceinline int8_t * bvhData ( ) { return & data [ 0 ] ; }
__forceinline const int8_t * bvhData ( ) const { return & data [ 0 ] ; }
2020-12-19 13:50:20 +00:00
/*! returns pointer to Grid array */
__forceinline float * gridData ( size_t t = 0 ) { return ( float * ) & data [ gridOffset + t * gridBytes ] ; }
__forceinline const float * gridData ( size_t t = 0 ) const { return ( float * ) & data [ gridOffset + t * gridBytes ] ; }
__forceinline void * encodeLeaf ( size_t u , size_t v ) {
return ( void * ) ( 16 * ( v * width + u + 1 ) ) ; // +1 to not create empty leaf
}
__forceinline float * decodeLeaf ( size_t t , const void * ptr ) {
return gridData ( t ) + ( ( ( size_t ) ( ptr ) > > 4 ) - 1 ) ;
}
/*! returns the size of the BVH over the grid in bytes */
static size_t getBVHBytes ( const GridRange & range , const size_t nodeBytes , const size_t leafBytes ) ;
/*! returns the size of the temporal BVH over the time range BVHs */
static size_t getTemporalBVHBytes ( const range < int > time_range , const size_t nodeBytes ) ;
/*! calculates bounding box of grid range */
__forceinline BBox3fa calculateBounds ( size_t time , const GridRange & range ) const
{
const float * const grid_array = gridData ( time ) ;
const float * const grid_x_array = grid_array + 0 * dim_offset ;
const float * const grid_y_array = grid_array + 1 * dim_offset ;
const float * const grid_z_array = grid_array + 2 * dim_offset ;
/* compute the bounds just for the range! */
BBox3fa bounds ( empty ) ;
for ( unsigned v = range . v_start ; v < = range . v_end ; v + + )
{
for ( unsigned u = range . u_start ; u < = range . u_end ; u + + )
{
const float x = grid_x_array [ v * width + u ] ;
const float y = grid_y_array [ v * width + u ] ;
const float z = grid_z_array [ v * width + u ] ;
bounds . extend ( Vec3fa ( x , y , z ) ) ;
}
}
assert ( is_finite ( bounds ) ) ;
return bounds ;
}
/*! Evaluates grid over patch and builds BVH4 tree over the grid. */
std : : pair < BVH4 : : NodeRef , BBox3fa > buildBVH ( BBox3fa * bounds_o ) ;
/*! Create BVH4 tree over grid. */
std : : pair < BVH4 : : NodeRef , BBox3fa > buildBVH ( const GridRange & range , size_t & allocator ) ;
/*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
std : : pair < BVH4 : : NodeRef , LBBox3fa > buildMSMBlurBVH ( const range < int > time_range , BBox3fa * bounds_o ) ;
/*! Create MBlur BVH4 tree over grid. */
std : : pair < BVH4 : : NodeRef , LBBox3fa > buildMBlurBVH ( size_t time , const GridRange & range , size_t & allocator ) ;
/*! Create MSMBlur BVH4 tree over grid. */
std : : pair < BVH4 : : NodeRef , LBBox3fa > buildMSMBlurBVH ( const range < int > time_range , size_t & allocator , BBox3fa * bounds_o ) ;
template < typename Loader >
struct MapUV
{
typedef typename Loader : : vfloat vfloat ;
const float * const grid_uv ;
size_t line_offset ;
size_t lines ;
__forceinline MapUV ( const float * const grid_uv , size_t line_offset , const size_t lines )
: grid_uv ( grid_uv ) , line_offset ( line_offset ) , lines ( lines ) { }
__forceinline void operator ( ) ( vfloat & u , vfloat & v ) const {
const Vec3 < vfloat > tri_v012_uv = Loader : : gather ( grid_uv , line_offset , lines ) ;
const Vec2 < vfloat > uv0 = GridSOA : : decodeUV ( tri_v012_uv [ 0 ] ) ;
const Vec2 < vfloat > uv1 = GridSOA : : decodeUV ( tri_v012_uv [ 1 ] ) ;
const Vec2 < vfloat > uv2 = GridSOA : : decodeUV ( tri_v012_uv [ 2 ] ) ;
const Vec2 < vfloat > uv = u * uv1 + v * uv2 + ( 1.0f - u - v ) * uv0 ;
u = uv [ 0 ] ; v = uv [ 1 ] ;
}
} ;
struct Gather2x3
{
enum { M = 4 } ;
typedef vbool4 vbool ;
typedef vint4 vint ;
typedef vfloat4 vfloat ;
static __forceinline const Vec3vf4 gather ( const float * const grid , const size_t line_offset , const size_t lines )
{
vfloat4 r0 = vfloat4 : : loadu ( grid + 0 * line_offset ) ;
vfloat4 r1 = vfloat4 : : loadu ( grid + 1 * line_offset ) ; // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
if ( unlikely ( line_offset = = 2 ) )
{
r0 = shuffle < 0 , 1 , 1 , 1 > ( r0 ) ;
r1 = shuffle < 0 , 1 , 1 , 1 > ( r1 ) ;
}
return Vec3vf4 ( unpacklo ( r0 , r1 ) , // r00, r10, r01, r11
shuffle < 1 , 1 , 2 , 2 > ( r0 ) , // r01, r01, r02, r02
shuffle < 0 , 1 , 1 , 2 > ( r1 ) ) ; // r10, r11, r11, r12
}
static __forceinline void gather ( const float * const grid_x ,
const float * const grid_y ,
const float * const grid_z ,
const size_t line_offset ,
const size_t lines ,
Vec3vf4 & v0_o ,
Vec3vf4 & v1_o ,
Vec3vf4 & v2_o )
{
const Vec3vf4 tri_v012_x = gather ( grid_x , line_offset , lines ) ;
const Vec3vf4 tri_v012_y = gather ( grid_y , line_offset , lines ) ;
const Vec3vf4 tri_v012_z = gather ( grid_z , line_offset , lines ) ;
v0_o = Vec3vf4 ( tri_v012_x [ 0 ] , tri_v012_y [ 0 ] , tri_v012_z [ 0 ] ) ;
v1_o = Vec3vf4 ( tri_v012_x [ 1 ] , tri_v012_y [ 1 ] , tri_v012_z [ 1 ] ) ;
v2_o = Vec3vf4 ( tri_v012_x [ 2 ] , tri_v012_y [ 2 ] , tri_v012_z [ 2 ] ) ;
}
} ;
# if defined (__AVX__)
struct Gather3x3
{
enum { M = 8 } ;
typedef vbool8 vbool ;
typedef vint8 vint ;
typedef vfloat8 vfloat ;
static __forceinline const Vec3vf8 gather ( const float * const grid , const size_t line_offset , const size_t lines )
{
vfloat4 ra = vfloat4 : : loadu ( grid + 0 * line_offset ) ;
vfloat4 rb = vfloat4 : : loadu ( grid + 1 * line_offset ) ; // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
vfloat4 rc ;
if ( likely ( lines > 2 ) )
rc = vfloat4 : : loadu ( grid + 2 * line_offset ) ;
else
rc = rb ;
if ( unlikely ( line_offset = = 2 ) )
{
ra = shuffle < 0 , 1 , 1 , 1 > ( ra ) ;
rb = shuffle < 0 , 1 , 1 , 1 > ( rb ) ;
rc = shuffle < 0 , 1 , 1 , 1 > ( rc ) ;
}
const vfloat8 r0 = vfloat8 ( ra , rb ) ;
const vfloat8 r1 = vfloat8 ( rb , rc ) ;
return Vec3vf8 ( unpacklo ( r0 , r1 ) , // r00, r10, r01, r11, r10, r20, r11, r21
shuffle < 1 , 1 , 2 , 2 > ( r0 ) , // r01, r01, r02, r02, r11, r11, r12, r12
shuffle < 0 , 1 , 1 , 2 > ( r1 ) ) ; // r10, r11, r11, r12, r20, r21, r21, r22
}
static __forceinline void gather ( const float * const grid_x ,
const float * const grid_y ,
const float * const grid_z ,
const size_t line_offset ,
const size_t lines ,
Vec3vf8 & v0_o ,
Vec3vf8 & v1_o ,
Vec3vf8 & v2_o )
{
const Vec3vf8 tri_v012_x = gather ( grid_x , line_offset , lines ) ;
const Vec3vf8 tri_v012_y = gather ( grid_y , line_offset , lines ) ;
const Vec3vf8 tri_v012_z = gather ( grid_z , line_offset , lines ) ;
v0_o = Vec3vf8 ( tri_v012_x [ 0 ] , tri_v012_y [ 0 ] , tri_v012_z [ 0 ] ) ;
v1_o = Vec3vf8 ( tri_v012_x [ 1 ] , tri_v012_y [ 1 ] , tri_v012_z [ 1 ] ) ;
v2_o = Vec3vf8 ( tri_v012_x [ 2 ] , tri_v012_y [ 2 ] , tri_v012_z [ 2 ] ) ;
}
} ;
# endif
template < typename vfloat >
static __forceinline Vec2 < vfloat > decodeUV ( const vfloat & uv )
{
typedef typename vfloat : : Int vint ;
const vint iu = asInt ( uv ) & 0xffff ;
const vint iv = srl ( asInt ( uv ) , 16 ) ;
const vfloat u = ( vfloat ) iu * vfloat ( 8.0f / 0x10000 ) ;
const vfloat v = ( vfloat ) iv * vfloat ( 8.0f / 0x10000 ) ;
return Vec2 < vfloat > ( u , v ) ;
}
__forceinline unsigned int geomID ( ) const {
return _geomID ;
}
__forceinline unsigned int primID ( ) const {
return _primID ;
}
public :
BVH4 : : NodeRef troot ;
2021-05-04 09:07:12 +00:00
# if !defined(__X86_64__) && !defined(__aarch64__)
2020-12-19 13:50:20 +00:00
unsigned align1 ;
# endif
unsigned time_steps ;
unsigned width ;
unsigned height ;
unsigned dim_offset ;
unsigned _geomID ;
unsigned _primID ;
unsigned align2 ;
unsigned gridOffset ;
unsigned gridBytes ;
unsigned rootOffset ;
2021-05-04 09:07:12 +00:00
int8_t data [ 1 ] ; //!< after the struct we first store the BVH, then the grid, and finally the roots
2020-12-19 13:50:20 +00:00
} ;
}
}