Merge pull request #52630 from JFonS/ebr_packets
Upgrade Embree and enable ray packets
This commit is contained in:
commit
a9b600bac0
@ -55,6 +55,9 @@ if env["builtin_embree"]:
|
||||
"kernels/bvh/bvh_builder_sah_mb.cpp",
|
||||
"kernels/bvh/bvh_builder_twolevel.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_filters.cpp",
|
||||
]
|
||||
|
||||
thirdparty_sources = [thirdparty_dir + file for file in embree_src]
|
||||
|
@ -61,6 +61,11 @@ cpp_files = [
|
||||
"kernels/bvh/bvh_builder_twolevel.cpp",
|
||||
"kernels/bvh/bvh_intersector1.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_filters.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream.cpp",
|
||||
]
|
||||
|
||||
os.chdir("../../thirdparty")
|
||||
@ -117,7 +122,7 @@ with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file:
|
||||
/* #undef EMBREE_GEOMETRY_INSTANCE */
|
||||
/* #undef EMBREE_GEOMETRY_GRID */
|
||||
/* #undef EMBREE_GEOMETRY_POINT */
|
||||
/* #undef EMBREE_RAY_PACKETS */
|
||||
#define EMBREE_RAY_PACKETS
|
||||
/* #undef EMBREE_COMPACT_POLYS */
|
||||
|
||||
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
|
||||
@ -249,3 +254,8 @@ with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as con
|
||||
|
||||
os.chdir("..")
|
||||
shutil.rmtree("embree-tmp")
|
||||
|
||||
subprocess.run(["git", "restore", "embree/patches"])
|
||||
|
||||
for patch in os.listdir("embree/patches"):
|
||||
subprocess.run(["git", "apply", "embree/patches/" + patch])
|
||||
|
@ -6,9 +6,9 @@
|
||||
|
||||
#define RTC_VERSION_MAJOR 3
|
||||
#define RTC_VERSION_MINOR 13
|
||||
#define RTC_VERSION_PATCH 0
|
||||
#define RTC_VERSION 31300
|
||||
#define RTC_VERSION_STRING "3.13.0"
|
||||
#define RTC_VERSION_PATCH 1
|
||||
#define RTC_VERSION 31301
|
||||
#define RTC_VERSION_STRING "3.13.1"
|
||||
|
||||
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
|
||||
|
||||
|
917
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
vendored
Normal file
917
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
vendored
Normal file
@ -0,0 +1,917 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_hybrid.h"
|
||||
#include "bvh_traverser1.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "node_intersector_packet.h"
|
||||
|
||||
#include "../geometry/intersector_iterators.h"
|
||||
#include "../geometry/triangle_intersector.h"
|
||||
#include "../geometry/trianglev_intersector.h"
|
||||
#include "../geometry/trianglev_mb_intersector.h"
|
||||
#include "../geometry/trianglei_intersector.h"
|
||||
#include "../geometry/quadv_intersector.h"
|
||||
#include "../geometry/quadi_intersector.h"
|
||||
#include "../geometry/curveNv_intersector.h"
|
||||
#include "../geometry/curveNi_intersector.h"
|
||||
#include "../geometry/curveNi_mb_intersector.h"
|
||||
#include "../geometry/linei_intersector.h"
|
||||
#include "../geometry/subdivpatch1_intersector.h"
|
||||
#include "../geometry/object_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
#include "../geometry/subgrid_intersector.h"
|
||||
#include "../geometry/subgrid_mb_intersector.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
#define SWITCH_DURING_DOWN_TRAVERSAL 1
|
||||
#define FORCE_SINGLE_MODE 0
|
||||
|
||||
#define ENABLE_FAST_COHERENT_CODEPATHS 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
|
||||
const BVH* bvh,
|
||||
NodeRef root,
|
||||
size_t k,
|
||||
Precalculations& pre,
|
||||
RayHitK<K>& ray,
|
||||
const TravRayK<K, robust>& tray,
|
||||
IntersectContext* context)
|
||||
{
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
|
||||
stack[0].ptr = root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray1;
|
||||
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves, 1, 1, 1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
|
||||
|
||||
tray1.tfar = ray.tfar[k];
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>& __restrict__ ray,
|
||||
IntersectContext* __restrict__ context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
|
||||
assert(context);
|
||||
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
|
||||
{
|
||||
intersectCoherent(valid_i, This, ray, context);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
|
||||
#if defined(__AVX__)
|
||||
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
|
||||
#endif
|
||||
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
if (single)
|
||||
{
|
||||
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
for (; valid_bits!=0; ) {
|
||||
const size_t i = bscf(valid_bits);
|
||||
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* determine switch threshold based on flags */
|
||||
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
/* test whether we have ray with opposing direction signs in the packet */
|
||||
bool split = false;
|
||||
{
|
||||
size_t bits = valid_bits;
|
||||
vbool<K> vsplit( false );
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(bits);
|
||||
vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
bits &= ~(size_t)movemask(octant_valid);
|
||||
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
|
||||
} while (bits);
|
||||
if (any(vsplit)) split = true;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
|
||||
const vint<K> count_diff_octant = \
|
||||
((diff_octant >> 2) & 1) +
|
||||
((diff_octant >> 1) & 1) +
|
||||
((diff_octant >> 0) & 1);
|
||||
|
||||
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
|
||||
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
|
||||
|
||||
|
||||
octant = select(octant_valid,vint<K>(0xffffffff),octant);
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
/* allocate stack and push root node */
|
||||
vfloat<K> stack_near[stackSizeChunk];
|
||||
NodeRef stack_node[stackSizeChunk];
|
||||
stack_node[0] = BVH::invalidNode;
|
||||
stack_near[0] = inf;
|
||||
stack_node[1] = bvh->root;
|
||||
stack_near[1] = tray.tnear;
|
||||
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
|
||||
NodeRef* __restrict__ sptr_node = stack_node + 2;
|
||||
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
assert(sptr_node > stack_node);
|
||||
sptr_node--;
|
||||
sptr_near--;
|
||||
NodeRef cur = *sptr_node;
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *sptr_near;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active)))
|
||||
continue;
|
||||
|
||||
/* switch to single ray traversal */
|
||||
#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (single)
|
||||
#endif
|
||||
{
|
||||
size_t bits = movemask(active);
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (unlikely(popcnt(bits) <= switchThreshold))
|
||||
#endif
|
||||
{
|
||||
for (; bits!=0; ) {
|
||||
const size_t i = bscf(bits);
|
||||
intersect1(This, bvh, cur, i, pre, ray, tray, context);
|
||||
}
|
||||
tray.tfar = min(tray.tfar, ray.tfar);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
const vbool<K> valid_node = tray.tfar > curDist;
|
||||
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const BaseNode* __restrict__ const node = nodeRef.baseNode();
|
||||
|
||||
/* set cur to invalid */
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
size_t num_child_hits = 0;
|
||||
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
{
|
||||
const NodeRef child = node->children[i];
|
||||
if (unlikely(child == BVH::emptyNode)) break;
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = valid_node;
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
/* if we hit the child we choose to continue with that child if it
|
||||
is closer than the current next child, or we push it onto the stack */
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
assert(sptr_node < stackEnd);
|
||||
assert(child != BVH::emptyNode);
|
||||
const vfloat<K> childDist = select(lhit, lnearP, inf);
|
||||
/* push cur node onto stack and continue with hit child */
|
||||
if (any(childDist < curDist))
|
||||
{
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
*sptr_node = cur; sptr_node++;
|
||||
*sptr_near = curDist; sptr_near++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
|
||||
/* push hit child onto stack */
|
||||
else {
|
||||
num_child_hits++;
|
||||
*sptr_node = child; sptr_node++;
|
||||
*sptr_near = childDist; sptr_near++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
|
||||
#endif
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode))
|
||||
goto pop;
|
||||
|
||||
/* improved distance sorting for 3 or more hits */
|
||||
if (unlikely(num_child_hits >= 2))
|
||||
{
|
||||
if (any(sptr_near[-2] < sptr_near[-1]))
|
||||
{
|
||||
std::swap(sptr_near[-2],sptr_near[-1]);
|
||||
std::swap(sptr_node[-2],sptr_node[-1]);
|
||||
}
|
||||
if (unlikely(num_child_hits >= 3))
|
||||
{
|
||||
if (any(sptr_near[-3] < sptr_near[-1]))
|
||||
{
|
||||
std::swap(sptr_near[-3],sptr_near[-1]);
|
||||
std::swap(sptr_node[-3],sptr_node[-1]);
|
||||
}
|
||||
if (any(sptr_near[-3] < sptr_near[-2]))
|
||||
{
|
||||
std::swap(sptr_near[-3],sptr_near[-2]);
|
||||
std::swap(sptr_node[-3],sptr_node[-2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
|
||||
if (single)
|
||||
{
|
||||
// seems to be the best place for testing utilization
|
||||
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
|
||||
{
|
||||
*sptr_node++ = cur;
|
||||
*sptr_near++ = curDist;
|
||||
goto pop;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return if stack is empty */
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*sptr_node = lazy_node; sptr_node++;
|
||||
*sptr_near = neg_inf; sptr_near++;
|
||||
}
|
||||
}
|
||||
} while(valid_bits);
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>& __restrict__ ray,
|
||||
IntersectContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
const vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
Frustum<robust> frustum;
|
||||
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
|
||||
|
||||
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *(float*)&stackPtr->dist;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active))) continue;
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
|
||||
|
||||
vfloat<N> fmin;
|
||||
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
|
||||
|
||||
if (unlikely(!m_frustum_node)) goto pop;
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
|
||||
#endif
|
||||
size_t num_child_hits = 0;
|
||||
do {
|
||||
const size_t i = bscf(m_frustum_node);
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
const vfloat<K> childDist = fmin[i];
|
||||
const NodeRef child = node->child(i);
|
||||
BVHN<N>::prefetch(child);
|
||||
if (any(childDist < curDist))
|
||||
{
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = cur;
|
||||
*(float*)&stackPtr->dist = toScalar(curDist);
|
||||
stackPtr++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
/* push hit child onto stack */
|
||||
else {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = child;
|
||||
*(float*)&stackPtr->dist = toScalar(childDist);
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
} while(m_frustum_node);
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode)) goto pop;
|
||||
|
||||
/* improved distance sorting for 3 or more hits */
|
||||
if (unlikely(num_child_hits >= 2))
|
||||
{
|
||||
if (stackPtr[-2].dist < stackPtr[-1].dist)
|
||||
std::swap(stackPtr[-2],stackPtr[-1]);
|
||||
if (unlikely(num_child_hits >= 3))
|
||||
{
|
||||
if (stackPtr[-3].dist < stackPtr[-1].dist)
|
||||
std::swap(stackPtr[-3],stackPtr[-1]);
|
||||
if (stackPtr[-3].dist < stackPtr[-2].dist)
|
||||
std::swap(stackPtr[-3],stackPtr[-2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::invalidNode);
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
|
||||
/* reduce max distance interval on successful intersection */
|
||||
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
|
||||
{
|
||||
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
|
||||
frustum.template updateMaxDist<K>(tray.tfar);
|
||||
}
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
|
||||
} while(valid_bits);
|
||||
}
|
||||
|
||||
// ===================================================================================================================================================================
|
||||
// ===================================================================================================================================================================
|
||||
// ===================================================================================================================================================================
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
|
||||
const BVH* bvh,
|
||||
NodeRef root,
|
||||
size_t k,
|
||||
Precalculations& pre,
|
||||
RayK<K>& ray,
|
||||
const TravRayK<K, robust>& tray,
|
||||
IntersectContext* context)
|
||||
{
|
||||
/* stack state */
|
||||
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
|
||||
NodeRef* stackPtr = stack+1; // current stack pointer
|
||||
NodeRef* stackEnd = stack+stackSizeSingle;
|
||||
stack[0] = root;
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray1;
|
||||
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = (NodeRef)*stackPtr;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(shadow.trav_nodes, 1, 1, 1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(shadow.trav_leaves, 1, 1, 1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
|
||||
ray.tfar[k] = neg_inf;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*stackPtr = lazy_node;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>& __restrict__ ray,
|
||||
IntersectContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
|
||||
assert(context);
|
||||
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
|
||||
{
|
||||
occludedCoherent(valid_i, This, ray, context);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* filter out already occluded and invalid rays */
|
||||
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
const size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
vbool<K> terminated = !valid;
|
||||
const vfloat<K> inf = vfloat<K>(pos_inf);
|
||||
|
||||
/* determine switch threshold based on flags */
|
||||
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
|
||||
|
||||
/* allocate stack and push root node */
|
||||
vfloat<K> stack_near[stackSizeChunk];
|
||||
NodeRef stack_node[stackSizeChunk];
|
||||
stack_node[0] = BVH::invalidNode;
|
||||
stack_near[0] = inf;
|
||||
stack_node[1] = bvh->root;
|
||||
stack_near[1] = tray.tnear;
|
||||
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
|
||||
NodeRef* __restrict__ sptr_node = stack_node + 2;
|
||||
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
assert(sptr_node > stack_node);
|
||||
sptr_node--;
|
||||
sptr_near--;
|
||||
NodeRef cur = *sptr_node;
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *sptr_near;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active)))
|
||||
continue;
|
||||
|
||||
/* switch to single ray traversal */
|
||||
#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (single)
|
||||
#endif
|
||||
{
|
||||
size_t bits = movemask(active);
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (unlikely(popcnt(bits) <= switchThreshold))
|
||||
#endif
|
||||
{
|
||||
for (; bits!=0; ) {
|
||||
const size_t i = bscf(bits);
|
||||
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
|
||||
set(terminated, i);
|
||||
}
|
||||
if (all(terminated)) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
const vbool<K> valid_node = tray.tfar > curDist;
|
||||
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const BaseNode* __restrict__ const node = nodeRef.baseNode();
|
||||
|
||||
/* set cur to invalid */
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
{
|
||||
const NodeRef child = node->children[i];
|
||||
if (unlikely(child == BVH::emptyNode)) break;
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = valid_node;
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
assert(sptr_node < stackEnd);
|
||||
assert(child != BVH::emptyNode);
|
||||
const vfloat<K> childDist = select(lhit, lnearP, inf);
|
||||
|
||||
/* push 'cur' node onto stack and continue with hit child */
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
*sptr_node = cur; sptr_node++;
|
||||
*sptr_near = curDist; sptr_near++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
}
|
||||
if (unlikely(cur == BVH::emptyNode))
|
||||
goto pop;
|
||||
|
||||
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
|
||||
if (single)
|
||||
{
|
||||
// seems to be the best place for testing utilization
|
||||
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
|
||||
{
|
||||
*sptr_node++ = cur;
|
||||
*sptr_near++ = curDist;
|
||||
goto pop;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return if stack is empty */
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
if (all(terminated)) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*sptr_node = lazy_node; sptr_node++;
|
||||
*sptr_near = neg_inf; sptr_near++;
|
||||
}
|
||||
}
|
||||
|
||||
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>& __restrict__ ray,
|
||||
IntersectContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid,ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
vbool<K> terminated = !valid;
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
|
||||
|
||||
Frustum<robust> frustum;
|
||||
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
|
||||
|
||||
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].mask = movemask(octant_valid);
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* cull node of active rays have already been terminated */
|
||||
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
|
||||
|
||||
if (unlikely(m_active == 0)) continue;
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
|
||||
|
||||
vfloat<N> fmin;
|
||||
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
|
||||
|
||||
if (unlikely(!m_frustum_node)) goto pop;
|
||||
cur = BVH::emptyNode;
|
||||
m_active = 0;
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
|
||||
#endif
|
||||
size_t num_child_hits = 0;
|
||||
do {
|
||||
const size_t i = bscf(m_frustum_node);
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
const NodeRef child = node->child(i);
|
||||
assert(child != BVH::emptyNode);
|
||||
BVHN<N>::prefetch(child);
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = cur;
|
||||
stackPtr->mask = m_active;
|
||||
stackPtr++;
|
||||
}
|
||||
cur = child;
|
||||
m_active = movemask(lhit);
|
||||
}
|
||||
} while(m_frustum_node);
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode)) goto pop;
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::invalidNode);
|
||||
assert(cur != BVH::emptyNode);
|
||||
#if defined(__AVX__)
|
||||
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
|
||||
#endif
|
||||
if (unlikely(!m_active)) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
octant_valid &= !terminated;
|
||||
if (unlikely(none(octant_valid))) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->mask = movemask(octant_valid);
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
} while(valid_bits);
|
||||
|
||||
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
|
||||
}
|
||||
}
|
||||
}
|
59
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp
vendored
Normal file
59
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_hybrid.cpp"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// BVH4Intersector4 Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
|
||||
|
||||
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
|
||||
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
|
||||
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
|
||||
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
|
||||
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
|
||||
|
||||
}
|
||||
}
|
||||
|
528
thirdparty/embree/kernels/bvh/bvh_intersector_stream.cpp
vendored
Normal file
528
thirdparty/embree/kernels/bvh/bvh_intersector_stream.cpp
vendored
Normal file
@ -0,0 +1,528 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_stream.h"
|
||||
|
||||
#include "../geometry/intersector_iterators.h"
|
||||
#include "../geometry/triangle_intersector.h"
|
||||
#include "../geometry/trianglev_intersector.h"
|
||||
#include "../geometry/trianglev_mb_intersector.h"
|
||||
#include "../geometry/trianglei_intersector.h"
|
||||
#include "../geometry/quadv_intersector.h"
|
||||
#include "../geometry/quadi_intersector.h"
|
||||
#include "../geometry/linei_intersector.h"
|
||||
#include "../geometry/subdivpatch1_intersector.h"
|
||||
#include "../geometry/object_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
|
||||
#include "../common/scene.h"
|
||||
#include <bitset>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
__aligned(64) static const int shiftTable[32] = {
|
||||
(int)1 << 0, (int)1 << 1, (int)1 << 2, (int)1 << 3, (int)1 << 4, (int)1 << 5, (int)1 << 6, (int)1 << 7,
|
||||
(int)1 << 8, (int)1 << 9, (int)1 << 10, (int)1 << 11, (int)1 << 12, (int)1 << 13, (int)1 << 14, (int)1 << 15,
|
||||
(int)1 << 16, (int)1 << 17, (int)1 << 18, (int)1 << 19, (int)1 << 20, (int)1 << 21, (int)1 << 22, (int)1 << 23,
|
||||
(int)1 << 24, (int)1 << 25, (int)1 << 26, (int)1 << 27, (int)1 << 28, (int)1 << 29, (int)1 << 30, (int)1 << 31
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector>
|
||||
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersect(Accel::Intersectors* __restrict__ This,
|
||||
RayHitN** inputPackets,
|
||||
size_t numOctantRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
BVH* __restrict__ bvh = (BVH*) This->ptr;
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
// Only the coherent code path is implemented
|
||||
assert(context->isCoherent());
|
||||
intersectCoherent(This, (RayHitK<VSIZEL>**)inputPackets, numOctantRays, context);
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector>
|
||||
template<int K>
|
||||
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersectCoherent(Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>** inputPackets,
|
||||
size_t numOctantRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
assert(context->isCoherent());
|
||||
|
||||
BVH* __restrict__ bvh = (BVH*) This->ptr;
|
||||
__aligned(64) StackItemMaskCoherent stack[stackSizeSingle]; // stack of nodes
|
||||
assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
|
||||
|
||||
__aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
|
||||
__aligned(64) Frustum<robust> frustum;
|
||||
|
||||
bool commonOctant = true;
|
||||
const size_t m_active = initPacketsAndFrustum((RayK<K>**)inputPackets, numOctantRays, packets, frustum, commonOctant);
|
||||
if (unlikely(m_active == 0)) return;
|
||||
|
||||
/* case of non-common origin */
|
||||
if (unlikely(!commonOctant))
|
||||
{
|
||||
const size_t numPackets = (numOctantRays+K-1)/K;
|
||||
for (size_t i = 0; i < numPackets; i++)
|
||||
This->intersect(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
|
||||
return;
|
||||
}
|
||||
|
||||
stack[0].mask = m_active;
|
||||
stack[0].parent = 0;
|
||||
stack[0].child = bvh->root;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StackItemMaskCoherent* stackPtr = stack + 1;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
STAT3(normal.trav_stack_pop,1,1,1);
|
||||
stackPtr--;
|
||||
/*! pop next node */
|
||||
NodeRef cur = NodeRef(stackPtr->child);
|
||||
size_t m_trav_active = stackPtr->mask;
|
||||
assert(m_trav_active);
|
||||
NodeRef parent = stackPtr->parent;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (unlikely(cur.isLeaf())) break;
|
||||
const AABBNode* __restrict__ const node = cur.getAABBNode();
|
||||
parent = cur;
|
||||
|
||||
__aligned(64) size_t maskK[N];
|
||||
for (size_t i = 0; i < N; i++)
|
||||
maskK[i] = m_trav_active;
|
||||
vfloat<N> dist;
|
||||
const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
|
||||
if (unlikely(m_node_hit == 0)) goto pop;
|
||||
|
||||
BVHNNodeTraverserStreamHitCoherent<N, types>::traverseClosestHit(cur, m_trav_active, vbool<N>((int)m_node_hit), dist, (size_t*)maskK, stackPtr);
|
||||
assert(m_trav_active);
|
||||
}
|
||||
|
||||
/* non-root and leaf => full culling test for all rays */
|
||||
if (unlikely(parent != 0 && cur.isLeaf()))
|
||||
{
|
||||
const AABBNode* __restrict__ const node = parent.getAABBNode();
|
||||
size_t boxID = 0xff;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
if (node->child(i) == cur) { boxID = i; break; }
|
||||
assert(boxID < N);
|
||||
assert(cur == node->child(boxID));
|
||||
m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
|
||||
}
|
||||
|
||||
/*! this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves, 1, 1, 1);
|
||||
size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
|
||||
|
||||
size_t bits = m_trav_active;
|
||||
|
||||
/*! intersect stream of rays with all primitives */
|
||||
size_t lazy_node = 0;
|
||||
#if defined(__SSE4_2__)
|
||||
STAT_USER(1,(popcnt(bits)+K-1)/K*4);
|
||||
#endif
|
||||
while(bits)
|
||||
{
|
||||
size_t i = bsf(bits) / K;
|
||||
const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
|
||||
assert(m_isec & bits);
|
||||
bits &= ~m_isec;
|
||||
|
||||
TravRayKStream<K, robust>& p = packets[i];
|
||||
vbool<K> m_valid = p.tnear <= p.tfar;
|
||||
PrimitiveIntersectorK<K>::intersectK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
|
||||
p.tfar = min(p.tfar, inputPackets[i]->tfar);
|
||||
};
|
||||
|
||||
} // traversal + intersection
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector>
|
||||
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occluded(Accel::Intersectors* __restrict__ This,
|
||||
RayN** inputPackets,
|
||||
size_t numOctantRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
BVH* __restrict__ bvh = (BVH*) This->ptr;
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
if (unlikely(context->isCoherent()))
|
||||
occludedCoherent(This, (RayK<VSIZEL>**)inputPackets, numOctantRays, context);
|
||||
else
|
||||
occludedIncoherent(This, (RayK<VSIZEX>**)inputPackets, numOctantRays, context);
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector>
|
||||
template<int K>
|
||||
__noinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedCoherent(Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>** inputPackets,
|
||||
size_t numOctantRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
assert(context->isCoherent());
|
||||
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
__aligned(64) StackItemMaskCoherent stack[stackSizeSingle]; // stack of nodes
|
||||
assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
|
||||
|
||||
/* inactive rays should have been filtered out before */
|
||||
__aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
|
||||
__aligned(64) Frustum<robust> frustum;
|
||||
|
||||
bool commonOctant = true;
|
||||
size_t m_active = initPacketsAndFrustum(inputPackets, numOctantRays, packets, frustum, commonOctant);
|
||||
|
||||
/* valid rays */
|
||||
if (unlikely(m_active == 0)) return;
|
||||
|
||||
/* case of non-common origin */
|
||||
if (unlikely(!commonOctant))
|
||||
{
|
||||
const size_t numPackets = (numOctantRays+K-1)/K;
|
||||
for (size_t i = 0; i < numPackets; i++)
|
||||
This->occluded(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
|
||||
return;
|
||||
}
|
||||
|
||||
stack[0].mask = m_active;
|
||||
stack[0].parent = 0;
|
||||
stack[0].child = bvh->root;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StackItemMaskCoherent* stackPtr = stack + 1;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
STAT3(normal.trav_stack_pop,1,1,1);
|
||||
stackPtr--;
|
||||
/*! pop next node */
|
||||
NodeRef cur = NodeRef(stackPtr->child);
|
||||
size_t m_trav_active = stackPtr->mask & m_active;
|
||||
if (unlikely(!m_trav_active)) continue;
|
||||
assert(m_trav_active);
|
||||
NodeRef parent = stackPtr->parent;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (unlikely(cur.isLeaf())) break;
|
||||
const AABBNode* __restrict__ const node = cur.getAABBNode();
|
||||
parent = cur;
|
||||
|
||||
__aligned(64) size_t maskK[N];
|
||||
for (size_t i = 0; i < N; i++)
|
||||
maskK[i] = m_trav_active;
|
||||
|
||||
vfloat<N> dist;
|
||||
const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
|
||||
if (unlikely(m_node_hit == 0)) goto pop;
|
||||
|
||||
BVHNNodeTraverserStreamHitCoherent<N, types>::traverseAnyHit(cur, m_trav_active, vbool<N>((int)m_node_hit), (size_t*)maskK, stackPtr);
|
||||
assert(m_trav_active);
|
||||
}
|
||||
|
||||
/* non-root and leaf => full culling test for all rays */
|
||||
if (unlikely(parent != 0 && cur.isLeaf()))
|
||||
{
|
||||
const AABBNode* __restrict__ const node = parent.getAABBNode();
|
||||
size_t boxID = 0xff;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
if (node->child(i) == cur) { boxID = i; break; }
|
||||
assert(boxID < N);
|
||||
assert(cur == node->child(boxID));
|
||||
m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
|
||||
}
|
||||
|
||||
/*! this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves, 1, 1, 1);
|
||||
size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
|
||||
|
||||
size_t bits = m_trav_active & m_active;
|
||||
/*! intersect stream of rays with all primitives */
|
||||
size_t lazy_node = 0;
|
||||
#if defined(__SSE4_2__)
|
||||
STAT_USER(1,(popcnt(bits)+K-1)/K*4);
|
||||
#endif
|
||||
while (bits)
|
||||
{
|
||||
size_t i = bsf(bits) / K;
|
||||
const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
|
||||
assert(m_isec & bits);
|
||||
bits &= ~m_isec;
|
||||
TravRayKStream<K, robust>& p = packets[i];
|
||||
vbool<K> m_valid = p.tnear <= p.tfar;
|
||||
vbool<K> m_hit = PrimitiveIntersectorK<K>::occludedK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
|
||||
inputPackets[i]->tfar = select(m_hit & m_valid, vfloat<K>(neg_inf), inputPackets[i]->tfar);
|
||||
m_active &= ~((size_t)movemask(m_hit) << (i*K));
|
||||
}
|
||||
|
||||
} // traversal + intersection
|
||||
}
|
||||
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector>
|
||||
template<int K>
|
||||
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedIncoherent(Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>** inputPackets,
|
||||
size_t numOctantRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
assert(!context->isCoherent());
|
||||
assert(types & BVH_FLAG_ALIGNED_NODE);
|
||||
|
||||
__aligned(64) TravRayKStream<K,robust> packet[MAX_INTERNAL_STREAM_SIZE/K];
|
||||
|
||||
assert(numOctantRays <= 32);
|
||||
const size_t numPackets = (numOctantRays+K-1)/K;
|
||||
size_t m_active = 0;
|
||||
for (size_t i = 0; i < numPackets; i++)
|
||||
{
|
||||
const vfloat<K> tnear = inputPackets[i]->tnear();
|
||||
const vfloat<K> tfar = inputPackets[i]->tfar;
|
||||
vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f);
|
||||
m_active |= (size_t)movemask(m_valid) << (K*i);
|
||||
const Vec3vf<K>& org = inputPackets[i]->org;
|
||||
const Vec3vf<K>& dir = inputPackets[i]->dir;
|
||||
vfloat<K> packet_min_dist = max(tnear, 0.0f);
|
||||
vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf);
|
||||
new (&packet[i]) TravRayKStream<K,robust>(org, dir, packet_min_dist, packet_max_dist);
|
||||
}
|
||||
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].mask = m_active;
|
||||
|
||||
size_t terminated = ~m_active;
|
||||
|
||||
/* near/far offsets based on first ray */
|
||||
const NearFarPrecalculations nf(Vec3fa(packet[0].rdir.x[0], packet[0].rdir.y[0], packet[0].rdir.z[0]), N);
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
STAT3(shadow.trav_stack_pop,1,1,1);
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
size_t cur_mask = stackPtr->mask & (~terminated);
|
||||
if (unlikely(cur_mask == 0)) continue;
|
||||
|
||||
while (true)
|
||||
{
|
||||
/*! stop if we found a leaf node */
|
||||
if (unlikely(cur.isLeaf())) break;
|
||||
const AABBNode* __restrict__ const node = cur.getAABBNode();
|
||||
|
||||
const vint<N> vmask = traverseIncoherentStream(cur_mask, packet, node, nf, shiftTable);
|
||||
|
||||
size_t mask = movemask(vmask != vint<N>(zero));
|
||||
if (unlikely(mask == 0)) goto pop;
|
||||
|
||||
__aligned(64) unsigned int child_mask[N];
|
||||
vint<N>::storeu(child_mask, vmask); // this explicit store here causes much better code generation
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
assert(r < N);
|
||||
cur = node->child(r);
|
||||
BVHN<N>::prefetch(cur,types);
|
||||
cur_mask = child_mask[r];
|
||||
|
||||
/* simple in order sequence */
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) continue;
|
||||
stackPtr->ptr = cur;
|
||||
stackPtr->mask = cur_mask;
|
||||
stackPtr++;
|
||||
|
||||
for (; ;)
|
||||
{
|
||||
r = bscf(mask);
|
||||
assert(r < N);
|
||||
|
||||
cur = node->child(r);
|
||||
BVHN<N>::prefetch(cur,types);
|
||||
cur_mask = child_mask[r];
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) break;
|
||||
stackPtr->ptr = cur;
|
||||
stackPtr->mask = cur_mask;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
|
||||
/*! this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(shadow.trav_leaves,1,1,1);
|
||||
size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
|
||||
|
||||
size_t bits = cur_mask;
|
||||
size_t lazy_node = 0;
|
||||
|
||||
for (; bits != 0;)
|
||||
{
|
||||
const size_t rayID = bscf(bits);
|
||||
|
||||
RayK<K> &ray = *inputPackets[rayID / K];
|
||||
const size_t k = rayID % K;
|
||||
if (PrimitiveIntersectorK<K>::occluded(This, ray, k, context, prim, num, lazy_node))
|
||||
{
|
||||
ray.tfar[k] = neg_inf;
|
||||
terminated |= (size_t)1 << rayID;
|
||||
}
|
||||
|
||||
/* lazy node */
|
||||
if (unlikely(lazy_node))
|
||||
{
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->mask = cur_mask;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(terminated == (size_t)-1)) break;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// ArrayIntersectorKStream Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<bool filter>
|
||||
struct Triangle4IntersectorStreamMoeller {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMIntersectorKMoeller<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Triangle4vIntersectorStreamPluecker {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMvIntersectorKPluecker<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Triangle4iIntersectorStreamMoeller {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKMoeller<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Triangle4iIntersectorStreamPluecker {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKPluecker<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Quad4vIntersectorStreamMoeller {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKMoeller<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Quad4iIntersectorStreamMoeller {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKMoeller<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Quad4vIntersectorStreamPluecker {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKPluecker<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
template<bool filter>
|
||||
struct Quad4iIntersectorStreamPluecker {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKPluecker<4 COMMA K COMMA true>>;
|
||||
};
|
||||
|
||||
struct ObjectIntersectorStream {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,ObjectIntersectorK<K COMMA false>>;
|
||||
};
|
||||
|
||||
struct InstanceIntersectorStream {
|
||||
template<int K> using Type = ArrayIntersectorKStream<K,InstanceIntersectorK<K>>;
|
||||
};
|
||||
|
||||
// =====================================================================================================
|
||||
// =====================================================================================================
|
||||
// =====================================================================================================
|
||||
|
||||
template<int N>
|
||||
void BVHNIntersectorStreamPacketFallback<N>::intersect(Accel::Intersectors* __restrict__ This,
|
||||
RayHitN** inputRays,
|
||||
size_t numTotalRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
if (unlikely(context->isCoherent()))
|
||||
intersectK(This, (RayHitK<VSIZEL>**)inputRays, numTotalRays, context);
|
||||
else
|
||||
intersectK(This, (RayHitK<VSIZEX>**)inputRays, numTotalRays, context);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNIntersectorStreamPacketFallback<N>::occluded(Accel::Intersectors* __restrict__ This,
|
||||
RayN** inputRays,
|
||||
size_t numTotalRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
if (unlikely(context->isCoherent()))
|
||||
occludedK(This, (RayK<VSIZEL>**)inputRays, numTotalRays, context);
|
||||
else
|
||||
occludedK(This, (RayK<VSIZEX>**)inputRays, numTotalRays, context);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
template<int K>
|
||||
__noinline void BVHNIntersectorStreamPacketFallback<N>::intersectK(Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>** inputRays,
|
||||
size_t numTotalRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
/* fallback to packets */
|
||||
for (size_t i = 0; i < numTotalRays; i += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
|
||||
vbool<K> valid = vi < vint<K>(int(numTotalRays));
|
||||
RayHitK<K>& ray = *(inputRays[i / K]);
|
||||
valid &= ray.tnear() <= ray.tfar;
|
||||
This->intersect(valid, ray, context);
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
template<int K>
|
||||
__noinline void BVHNIntersectorStreamPacketFallback<N>::occludedK(Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>** inputRays,
|
||||
size_t numTotalRays,
|
||||
IntersectContext* context)
|
||||
{
|
||||
/* fallback to packets */
|
||||
for (size_t i = 0; i < numTotalRays; i += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
|
||||
vbool<K> valid = vi < vint<K>(int(numTotalRays));
|
||||
RayK<K>& ray = *(inputRays[i / K]);
|
||||
valid &= ray.tnear() <= ray.tfar;
|
||||
This->occluded(valid, ray, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
36
thirdparty/embree/kernels/bvh/bvh_intersector_stream_bvh4.cpp
vendored
Normal file
36
thirdparty/embree/kernels/bvh/bvh_intersector_stream_bvh4.cpp
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_stream.cpp"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// General BVHIntersectorStreamPacketFallback Intersector
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DEFINE_INTERSECTORN(BVH4IntersectorStreamPacketFallback,BVHNIntersectorStreamPacketFallback<4>);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// BVH4IntersectorStream Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4iIntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Triangle4iIntersectorStreamMoeller<true>>));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4vIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Triangle4vIntersectorStreamPluecker<true>>));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4iIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Triangle4iIntersectorStreamPluecker<true>>));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4IntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Triangle4IntersectorStreamMoeller<true>>));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4IntersectorStreamMoellerNoFilter, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Triangle4IntersectorStreamMoeller<false>>));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4vIntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Quad4vIntersectorStreamMoeller<true>>));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4vIntersectorStreamMoellerNoFilter,BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Quad4vIntersectorStreamMoeller<false>>));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4iIntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Quad4iIntersectorStreamMoeller<true>>));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4vIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Quad4vIntersectorStreamPluecker<true>>));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4iIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Quad4iIntersectorStreamPluecker<true>>));
|
||||
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTORN(BVH4VirtualIntersectorStream,BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA ObjectIntersectorStream>));
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTORN(BVH4InstanceIntersectorStream,BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA InstanceIntersectorStream>));
|
||||
}
|
||||
}
|
657
thirdparty/embree/kernels/bvh/bvh_intersector_stream_filters.cpp
vendored
Normal file
657
thirdparty/embree/kernels/bvh/bvh_intersector_stream_filters.cpp
vendored
Normal file
@ -0,0 +1,657 @@
|
||||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_stream_filters.h"
|
||||
#include "bvh_intersector_stream.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int K, bool intersect>
|
||||
__noinline void RayStreamFilter::filterAOS(Scene* scene, void* _rayN, size_t N, size_t stride, IntersectContext* context)
|
||||
{
|
||||
RayStreamAOS rayN(_rayN);
|
||||
|
||||
/* use fast path for coherent ray mode */
|
||||
if (unlikely(context->isCoherent()))
|
||||
{
|
||||
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
|
||||
{
|
||||
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
|
||||
|
||||
/* convert from AOS to SOA */
|
||||
for (size_t j = 0; j < size; j += K)
|
||||
{
|
||||
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
|
||||
const vbool<K> valid = vij < vint<K>(int(N));
|
||||
const vint<K> offset = vij * int(stride);
|
||||
const size_t packetIndex = j / K;
|
||||
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
|
||||
rays[packetIndex] = ray;
|
||||
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
|
||||
}
|
||||
|
||||
/* trace stream */
|
||||
scene->intersectors.intersectN(rayPtrs, size, context);
|
||||
|
||||
/* convert from SOA to AOS */
|
||||
for (size_t j = 0; j < size; j += K)
|
||||
{
|
||||
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
|
||||
const vbool<K> valid = vij < vint<K>(int(N));
|
||||
const vint<K> offset = vij * int(stride);
|
||||
const size_t packetIndex = j / K;
|
||||
rayN.setHitByOffset(valid, offset, rays[packetIndex]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (unlikely(!intersect))
|
||||
{
|
||||
/* octant sorting for occlusion rays */
|
||||
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
|
||||
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
unsigned int raysInOctant[8];
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
raysInOctant[i] = 0;
|
||||
size_t inputRayID = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int curOctant = -1;
|
||||
|
||||
/* sort rays into octants */
|
||||
for (; inputRayID < N;)
|
||||
{
|
||||
const Ray& ray = rayN.getRayByOffset(inputRayID * stride);
|
||||
|
||||
/* skip invalid rays */
|
||||
if (unlikely(ray.tnear() > ray.tfar || ray.tfar < 0.0f)) { inputRayID++; continue; } // ignore invalid or already occluded rays
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
if (unlikely(!ray.valid())) { inputRayID++; continue; }
|
||||
#endif
|
||||
|
||||
const unsigned int octantID = movemask(vfloat4(Vec3fa(ray.dir)) < 0.0f) & 0x7;
|
||||
|
||||
assert(octantID < 8);
|
||||
octants[octantID][raysInOctant[octantID]++] = (unsigned int)inputRayID;
|
||||
inputRayID++;
|
||||
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
|
||||
{
|
||||
curOctant = octantID;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* need to flush rays in octant? */
|
||||
if (unlikely(curOctant == -1))
|
||||
{
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
if (raysInOctant[i]) { curOctant = i; break; }
|
||||
}
|
||||
|
||||
/* all rays traced? */
|
||||
if (unlikely(curOctant == -1))
|
||||
break;
|
||||
|
||||
unsigned int* const rayIDs = &octants[curOctant][0];
|
||||
const unsigned int numOctantRays = raysInOctant[curOctant];
|
||||
assert(numOctantRays);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> offset = *(vint<K>*)&rayIDs[j] * int(stride);
|
||||
RayK<K>& ray = rays[j/K];
|
||||
rayPtrs[j/K] = &ray;
|
||||
ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> offset = *(vint<K>*)&rayIDs[j] * int(stride);
|
||||
rayN.setHitByOffset<K>(valid, offset, rays[j/K]);
|
||||
}
|
||||
|
||||
raysInOctant[curOctant] = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* fallback to packets */
|
||||
for (size_t i = 0; i < N; i += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
|
||||
vbool<K> valid = vi < vint<K>(int(N));
|
||||
const vint<K> offset = vi * int(stride);
|
||||
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
valid &= ray.tnear() <= ray.tfar;
|
||||
|
||||
scene->intersectors.intersect(valid, ray, context);
|
||||
|
||||
rayN.setHitByOffset<K>(valid, offset, ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int K, bool intersect>
|
||||
__noinline void RayStreamFilter::filterAOP(Scene* scene, void** _rayN, size_t N, IntersectContext* context)
|
||||
{
|
||||
RayStreamAOP rayN(_rayN);
|
||||
|
||||
/* use fast path for coherent ray mode */
|
||||
if (unlikely(context->isCoherent()))
|
||||
{
|
||||
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
|
||||
{
|
||||
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
|
||||
|
||||
/* convert from AOP to SOA */
|
||||
for (size_t j = 0; j < size; j += K)
|
||||
{
|
||||
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
|
||||
const vbool<K> valid = vij < vint<K>(int(N));
|
||||
const size_t packetIndex = j / K;
|
||||
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByIndex<K>(valid, vij);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
|
||||
rays[packetIndex] = ray;
|
||||
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
|
||||
}
|
||||
|
||||
/* trace stream */
|
||||
scene->intersectors.intersectN(rayPtrs, size, context);
|
||||
|
||||
/* convert from SOA to AOP */
|
||||
for (size_t j = 0; j < size; j += K)
|
||||
{
|
||||
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
|
||||
const vbool<K> valid = vij < vint<K>(int(N));
|
||||
const size_t packetIndex = j / K;
|
||||
|
||||
rayN.setHitByIndex<K>(valid, vij, rays[packetIndex]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (unlikely(!intersect))
|
||||
{
|
||||
/* octant sorting for occlusion rays */
|
||||
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
|
||||
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
unsigned int raysInOctant[8];
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
raysInOctant[i] = 0;
|
||||
size_t inputRayID = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int curOctant = -1;
|
||||
|
||||
/* sort rays into octants */
|
||||
for (; inputRayID < N;)
|
||||
{
|
||||
const Ray& ray = rayN.getRayByIndex(inputRayID);
|
||||
|
||||
/* skip invalid rays */
|
||||
if (unlikely(ray.tnear() > ray.tfar || ray.tfar < 0.0f)) { inputRayID++; continue; } // ignore invalid or already occluded rays
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
if (unlikely(!ray.valid())) { inputRayID++; continue; }
|
||||
#endif
|
||||
|
||||
const unsigned int octantID = movemask(lt_mask(ray.dir,Vec3fa(0.0f)));
|
||||
|
||||
assert(octantID < 8);
|
||||
octants[octantID][raysInOctant[octantID]++] = (unsigned int)inputRayID;
|
||||
inputRayID++;
|
||||
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
|
||||
{
|
||||
curOctant = octantID;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* need to flush rays in octant? */
|
||||
if (unlikely(curOctant == -1))
|
||||
{
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
if (raysInOctant[i]) { curOctant = i; break; }
|
||||
}
|
||||
|
||||
/* all rays traced? */
|
||||
if (unlikely(curOctant == -1))
|
||||
break;
|
||||
|
||||
unsigned int* const rayIDs = &octants[curOctant][0];
|
||||
const unsigned int numOctantRays = raysInOctant[curOctant];
|
||||
assert(numOctantRays);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> index = *(vint<K>*)&rayIDs[j];
|
||||
RayK<K>& ray = rays[j/K];
|
||||
rayPtrs[j/K] = &ray;
|
||||
ray = rayN.getRayByIndex<K>(valid, index);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> index = *(vint<K>*)&rayIDs[j];
|
||||
rayN.setHitByIndex<K>(valid, index, rays[j/K]);
|
||||
}
|
||||
|
||||
raysInOctant[curOctant] = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* fallback to packets */
|
||||
for (size_t i = 0; i < N; i += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
|
||||
vbool<K> valid = vi < vint<K>(int(N));
|
||||
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByIndex<K>(valid, vi);
|
||||
valid &= ray.tnear() <= ray.tfar;
|
||||
|
||||
scene->intersectors.intersect(valid, ray, context);
|
||||
|
||||
rayN.setHitByIndex<K>(valid, vi, ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int K, bool intersect>
|
||||
__noinline void RayStreamFilter::filterSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context)
|
||||
{
|
||||
const size_t rayDataAlignment = (size_t)rayData % (K*sizeof(float));
|
||||
const size_t offsetAlignment = (size_t)stride % (K*sizeof(float));
|
||||
|
||||
/* fast path for packets with the correct width and data alignment */
|
||||
if (likely(N == K &&
|
||||
!rayDataAlignment &&
|
||||
!offsetAlignment))
|
||||
{
|
||||
if (unlikely(context->isCoherent()))
|
||||
{
|
||||
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
size_t packetIndex = 0;
|
||||
for (size_t i = 0; i < numPackets; i++)
|
||||
{
|
||||
const size_t offset = i * stride;
|
||||
RayTypeK<K, intersect>& ray = *(RayTypeK<K, intersect>*)(rayData + offset);
|
||||
rayPtrs[packetIndex++] = &ray;
|
||||
|
||||
/* trace as stream */
|
||||
if (unlikely(packetIndex == MAX_INTERNAL_STREAM_SIZE / K))
|
||||
{
|
||||
const size_t size = packetIndex*K;
|
||||
scene->intersectors.intersectN(rayPtrs, size, context);
|
||||
packetIndex = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* flush remaining packets */
|
||||
if (unlikely(packetIndex > 0))
|
||||
{
|
||||
const size_t size = packetIndex*K;
|
||||
scene->intersectors.intersectN(rayPtrs, size, context);
|
||||
}
|
||||
}
|
||||
else if (unlikely(!intersect))
|
||||
{
|
||||
/* octant sorting for occlusion rays */
|
||||
RayStreamSOA rayN(rayData, K);
|
||||
|
||||
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
|
||||
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
unsigned int raysInOctant[8];
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
raysInOctant[i] = 0;
|
||||
size_t inputRayID = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int curOctant = -1;
|
||||
|
||||
/* sort rays into octants */
|
||||
for (; inputRayID < N*numPackets;)
|
||||
{
|
||||
const size_t offset = (inputRayID / K) * stride + (inputRayID % K) * sizeof(float);
|
||||
|
||||
/* skip invalid rays */
|
||||
if (unlikely(!rayN.isValidByOffset(offset))) { inputRayID++; continue; } // ignore invalid or already occluded rays
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
__aligned(64) Ray ray = rayN.getRayByOffset(offset);
|
||||
if (unlikely(!ray.valid())) { inputRayID++; continue; }
|
||||
#endif
|
||||
|
||||
const unsigned int octantID = (unsigned int)rayN.getOctantByOffset(offset);
|
||||
|
||||
assert(octantID < 8);
|
||||
octants[octantID][raysInOctant[octantID]++] = (unsigned int)offset;
|
||||
inputRayID++;
|
||||
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
|
||||
{
|
||||
curOctant = octantID;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* need to flush rays in octant? */
|
||||
if (unlikely(curOctant == -1))
|
||||
{
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
if (raysInOctant[i]) { curOctant = i; break; }
|
||||
}
|
||||
|
||||
/* all rays traced? */
|
||||
if (unlikely(curOctant == -1))
|
||||
break;
|
||||
|
||||
unsigned int* const rayOffsets = &octants[curOctant][0];
|
||||
const unsigned int numOctantRays = raysInOctant[curOctant];
|
||||
assert(numOctantRays);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
|
||||
RayK<K>& ray = rays[j/K];
|
||||
rayPtrs[j/K] = &ray;
|
||||
ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
|
||||
rayN.setHitByOffset(valid, offset, rays[j/K]);
|
||||
}
|
||||
raysInOctant[curOctant] = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* fallback to packets */
|
||||
for (size_t i = 0; i < numPackets; i++)
|
||||
{
|
||||
const size_t offset = i * stride;
|
||||
RayTypeK<K, intersect>& ray = *(RayTypeK<K, intersect>*)(rayData + offset);
|
||||
const vbool<K> valid = ray.tnear() <= ray.tfar;
|
||||
|
||||
scene->intersectors.intersect(valid, ray, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* fallback to packets for arbitrary packet size and alignment */
|
||||
for (size_t i = 0; i < numPackets; i++)
|
||||
{
|
||||
const size_t offsetN = i * stride;
|
||||
RayStreamSOA rayN(rayData + offsetN, N);
|
||||
|
||||
for (size_t j = 0; j < N; j += K)
|
||||
{
|
||||
const size_t offset = j * sizeof(float);
|
||||
vbool<K> valid = (vint<K>(int(j)) + vint<K>(step)) < vint<K>(int(N));
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
valid &= ray.tnear() <= ray.tfar;
|
||||
|
||||
scene->intersectors.intersect(valid, ray, context);
|
||||
|
||||
rayN.setHitByOffset(valid, offset, ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int K, bool intersect>
|
||||
__noinline void RayStreamFilter::filterSOP(Scene* scene, const void* _rayN, size_t N, IntersectContext* context)
|
||||
{
|
||||
RayStreamSOP& rayN = *(RayStreamSOP*)_rayN;
|
||||
|
||||
/* use fast path for coherent ray mode */
|
||||
if (unlikely(context->isCoherent()))
|
||||
{
|
||||
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
|
||||
{
|
||||
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
|
||||
|
||||
/* convert from SOP to SOA */
|
||||
for (size_t j = 0; j < size; j += K)
|
||||
{
|
||||
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
|
||||
const vbool<K> valid = vij < vint<K>(int(N));
|
||||
const size_t offset = (i+j) * sizeof(float);
|
||||
const size_t packetIndex = j / K;
|
||||
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
|
||||
rays[packetIndex] = ray;
|
||||
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
|
||||
}
|
||||
|
||||
/* trace stream */
|
||||
scene->intersectors.intersectN(rayPtrs, size, context);
|
||||
|
||||
/* convert from SOA to SOP */
|
||||
for (size_t j = 0; j < size; j += K)
|
||||
{
|
||||
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
|
||||
const vbool<K> valid = vij < vint<K>(int(N));
|
||||
const size_t offset = (i+j) * sizeof(float);
|
||||
const size_t packetIndex = j / K;
|
||||
|
||||
rayN.setHitByOffset(valid, offset, rays[packetIndex]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (unlikely(!intersect))
|
||||
{
|
||||
/* octant sorting for occlusion rays */
|
||||
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
|
||||
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
|
||||
|
||||
unsigned int raysInOctant[8];
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
raysInOctant[i] = 0;
|
||||
size_t inputRayID = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int curOctant = -1;
|
||||
|
||||
/* sort rays into octants */
|
||||
for (; inputRayID < N;)
|
||||
{
|
||||
const size_t offset = inputRayID * sizeof(float);
|
||||
/* skip invalid rays */
|
||||
if (unlikely(!rayN.isValidByOffset(offset))) { inputRayID++; continue; } // ignore invalid or already occluded rays
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
__aligned(64) Ray ray = rayN.getRayByOffset(offset);
|
||||
if (unlikely(!ray.valid())) { inputRayID++; continue; }
|
||||
#endif
|
||||
|
||||
const unsigned int octantID = (unsigned int)rayN.getOctantByOffset(offset);
|
||||
|
||||
assert(octantID < 8);
|
||||
octants[octantID][raysInOctant[octantID]++] = (unsigned int)offset;
|
||||
inputRayID++;
|
||||
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
|
||||
{
|
||||
curOctant = octantID;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* need to flush rays in octant? */
|
||||
if (unlikely(curOctant == -1))
|
||||
{
|
||||
for (unsigned int i = 0; i < 8; i++)
|
||||
if (raysInOctant[i]) { curOctant = i; break; }
|
||||
}
|
||||
|
||||
/* all rays traced? */
|
||||
if (unlikely(curOctant == -1))
|
||||
break;
|
||||
|
||||
unsigned int* const rayOffsets = &octants[curOctant][0];
|
||||
const unsigned int numOctantRays = raysInOctant[curOctant];
|
||||
assert(numOctantRays);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
|
||||
RayK<K>& ray = rays[j/K];
|
||||
rayPtrs[j/K] = &ray;
|
||||
ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
ray.tnear() = select(valid, ray.tnear(), zero);
|
||||
ray.tfar = select(valid, ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
|
||||
|
||||
for (unsigned int j = 0; j < numOctantRays; j += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
|
||||
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
|
||||
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
|
||||
rayN.setHitByOffset(valid, offset, rays[j/K]);
|
||||
}
|
||||
|
||||
raysInOctant[curOctant] = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* fallback to packets */
|
||||
for (size_t i = 0; i < N; i += K)
|
||||
{
|
||||
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
|
||||
vbool<K> valid = vi < vint<K>(int(N));
|
||||
const size_t offset = i * sizeof(float);
|
||||
|
||||
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
|
||||
valid &= ray.tnear() <= ray.tfar;
|
||||
|
||||
scene->intersectors.intersect(valid, ray, context);
|
||||
|
||||
rayN.setHitByOffset(valid, offset, ray);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RayStreamFilter::intersectAOS(Scene* scene, RTCRayHit* _rayN, size_t N, size_t stride, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterAOS<VSIZEL, true>(scene, _rayN, N, stride, context);
|
||||
else
|
||||
filterAOS<VSIZEX, true>(scene, _rayN, N, stride, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::occludedAOS(Scene* scene, RTCRay* _rayN, size_t N, size_t stride, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterAOS<VSIZEL, false>(scene, _rayN, N, stride, context);
|
||||
else
|
||||
filterAOS<VSIZEX, false>(scene, _rayN, N, stride, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::intersectAOP(Scene* scene, RTCRayHit** _rayN, size_t N, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterAOP<VSIZEL, true>(scene, (void**)_rayN, N, context);
|
||||
else
|
||||
filterAOP<VSIZEX, true>(scene, (void**)_rayN, N, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::occludedAOP(Scene* scene, RTCRay** _rayN, size_t N, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterAOP<VSIZEL, false>(scene, (void**)_rayN, N, context);
|
||||
else
|
||||
filterAOP<VSIZEX, false>(scene, (void**)_rayN, N, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::intersectSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterSOA<VSIZEL, true>(scene, rayData, N, numPackets, stride, context);
|
||||
else
|
||||
filterSOA<VSIZEX, true>(scene, rayData, N, numPackets, stride, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::occludedSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterSOA<VSIZEL, false>(scene, rayData, N, numPackets, stride, context);
|
||||
else
|
||||
filterSOA<VSIZEX, false>(scene, rayData, N, numPackets, stride, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::intersectSOP(Scene* scene, const RTCRayHitNp* _rayN, size_t N, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterSOP<VSIZEL, true>(scene, _rayN, N, context);
|
||||
else
|
||||
filterSOP<VSIZEX, true>(scene, _rayN, N, context);
|
||||
}
|
||||
|
||||
void RayStreamFilter::occludedSOP(Scene* scene, const RTCRayNp* _rayN, size_t N, IntersectContext* context) {
|
||||
if (unlikely(context->isCoherent()))
|
||||
filterSOP<VSIZEL, false>(scene, _rayN, N, context);
|
||||
else
|
||||
filterSOP<VSIZEX, false>(scene, _rayN, N, context);
|
||||
}
|
||||
|
||||
|
||||
RayStreamFilterFuncs rayStreamFilterFuncs() {
|
||||
return RayStreamFilterFuncs(RayStreamFilter::intersectAOS, RayStreamFilter::intersectAOP, RayStreamFilter::intersectSOA, RayStreamFilter::intersectSOP,
|
||||
RayStreamFilter::occludedAOS, RayStreamFilter::occludedAOP, RayStreamFilter::occludedSOA, RayStreamFilter::occludedSOP);
|
||||
}
|
||||
};
|
||||
};
|
2
thirdparty/embree/kernels/config.h
vendored
2
thirdparty/embree/kernels/config.h
vendored
@ -16,7 +16,7 @@
|
||||
/* #undef EMBREE_GEOMETRY_INSTANCE */
|
||||
/* #undef EMBREE_GEOMETRY_GRID */
|
||||
/* #undef EMBREE_GEOMETRY_POINT */
|
||||
/* #undef EMBREE_RAY_PACKETS */
|
||||
#define EMBREE_RAY_PACKETS
|
||||
/* #undef EMBREE_COMPACT_POLYS */
|
||||
|
||||
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
|
||||
|
2
thirdparty/embree/kernels/hash.h
vendored
2
thirdparty/embree/kernels/hash.h
vendored
@ -2,4 +2,4 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#define RTC_HASH "7c53133eb21424f7f0ae1e25bf357e358feaf6ab"
|
||||
#define RTC_HASH "12b99393438a4cc9e478e33459eed78bec6233fd"
|
||||
|
Loading…
Reference in New Issue
Block a user