Merge pull request #88783 from Chubercik/embree-4.3.1
embree: Update to 4.3.1
This commit is contained in:
commit
acfcdbd291
|
@ -15,10 +15,10 @@ if env["builtin_embree"]:
|
|||
embree_src = [
|
||||
"common/sys/sysinfo.cpp",
|
||||
"common/sys/alloc.cpp",
|
||||
"common/sys/estring.cpp",
|
||||
"common/sys/filename.cpp",
|
||||
"common/sys/library.cpp",
|
||||
"common/sys/thread.cpp",
|
||||
"common/sys/string.cpp",
|
||||
"common/sys/regression.cpp",
|
||||
"common/sys/mutex.cpp",
|
||||
"common/sys/condition.cpp",
|
||||
|
@ -36,6 +36,7 @@ if env["builtin_embree"]:
|
|||
"kernels/common/rtcore.cpp",
|
||||
"kernels/common/rtcore_builder.cpp",
|
||||
"kernels/common/scene.cpp",
|
||||
"kernels/common/scene_verify.cpp",
|
||||
"kernels/common/alloc.cpp",
|
||||
"kernels/common/geometry.cpp",
|
||||
"kernels/common/scene_triangle_mesh.cpp",
|
||||
|
@ -56,8 +57,6 @@ if env["builtin_embree"]:
|
|||
"kernels/bvh/bvh_builder_twolevel.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_filters.cpp",
|
||||
]
|
||||
|
||||
thirdparty_sources = [thirdparty_dir + file for file in embree_src]
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
import glob, os, shutil, subprocess, re
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import stat
|
||||
import subprocess
|
||||
from types import TracebackType
|
||||
from typing import Any, Callable, Tuple, Type
|
||||
|
||||
git_tag = "v3.13.5"
|
||||
git_tag = "v4.3.1"
|
||||
|
||||
include_dirs = [
|
||||
"common/tasking",
|
||||
|
@ -15,7 +22,7 @@ include_dirs = [
|
|||
"common/simd",
|
||||
"common/simd/arm",
|
||||
"common/simd/wasm",
|
||||
"include/embree3",
|
||||
"include/embree4",
|
||||
"kernels/subdiv",
|
||||
"kernels/geometry",
|
||||
]
|
||||
|
@ -23,10 +30,10 @@ include_dirs = [
|
|||
cpp_files = [
|
||||
"common/sys/sysinfo.cpp",
|
||||
"common/sys/alloc.cpp",
|
||||
"common/sys/estring.cpp",
|
||||
"common/sys/filename.cpp",
|
||||
"common/sys/library.cpp",
|
||||
"common/sys/thread.cpp",
|
||||
"common/sys/string.cpp",
|
||||
"common/sys/regression.cpp",
|
||||
"common/sys/mutex.cpp",
|
||||
"common/sys/condition.cpp",
|
||||
|
@ -44,6 +51,7 @@ cpp_files = [
|
|||
"kernels/common/rtcore.cpp",
|
||||
"kernels/common/rtcore_builder.cpp",
|
||||
"kernels/common/scene.cpp",
|
||||
"kernels/common/scene_verify.cpp",
|
||||
"kernels/common/alloc.cpp",
|
||||
"kernels/common/geometry.cpp",
|
||||
"kernels/common/scene_triangle_mesh.cpp",
|
||||
|
@ -65,26 +73,58 @@ cpp_files = [
|
|||
"kernels/bvh/bvh_intersector1.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream_filters.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid.cpp",
|
||||
"kernels/bvh/bvh_intersector_stream.cpp",
|
||||
]
|
||||
|
||||
os.chdir("../../thirdparty")
|
||||
config_files = [
|
||||
"kernels/config.h.in",
|
||||
"kernels/rtcore_config.h.in",
|
||||
]
|
||||
|
||||
license_file = "LICENSE.txt"
|
||||
|
||||
os.chdir(f"{os.path.dirname(__file__)}/../../thirdparty")
|
||||
|
||||
dir_name = "embree"
|
||||
if os.path.exists(dir_name):
|
||||
shutil.rmtree(dir_name)
|
||||
|
||||
# In case something went wrong and embree-tmp stayed on the system.
|
||||
if os.path.exists("embree-tmp"):
|
||||
shutil.rmtree("embree-tmp")
|
||||
|
||||
subprocess.run(["git", "clone", "https://github.com/embree/embree.git", "embree-tmp"])
|
||||
os.chdir("embree-tmp")
|
||||
subprocess.run(["git", "checkout", git_tag])
|
||||
|
||||
commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip()
|
||||
|
||||
|
||||
def on_rm_error(
|
||||
function: Callable[..., Any], path: str, excinfo: Tuple[Type[Exception], Exception, TracebackType]
|
||||
) -> None:
|
||||
"""
|
||||
Error handler for `shutil.rmtree()`.
|
||||
|
||||
If the error is due to read-only files,
|
||||
it will change the file permissions and retry.
|
||||
"""
|
||||
os.chmod(path, stat.S_IWRITE)
|
||||
os.unlink(path)
|
||||
|
||||
|
||||
# 3.12 Python and beyond should replace `onerror` with `onexc`.
|
||||
# We remove the .git directory because it contains
|
||||
# a lot of read-only files that are problematic on Windows.
|
||||
shutil.rmtree(".git", onerror=on_rm_error)
|
||||
|
||||
all_files = set(cpp_files)
|
||||
|
||||
for config_file in config_files:
|
||||
all_files.add(config_file)
|
||||
|
||||
all_files.add(license_file)
|
||||
|
||||
dest_dir = os.path.join("..", dir_name)
|
||||
for include_dir in include_dirs:
|
||||
headers = glob.iglob(os.path.join(include_dir, "*.h"))
|
||||
|
@ -105,87 +145,8 @@ with open(os.path.join(dest_dir, "kernels/hash.h"), "w", encoding="utf-8", newli
|
|||
"""
|
||||
)
|
||||
|
||||
with open(os.path.join(dest_dir, "kernels/config.h"), "w", encoding="utf-8", newline="\n") as config_file:
|
||||
config_file.write(
|
||||
"""// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/* #undef EMBREE_RAY_MASK */
|
||||
/* #undef EMBREE_STAT_COUNTERS */
|
||||
/* #undef EMBREE_BACKFACE_CULLING */
|
||||
/* #undef EMBREE_BACKFACE_CULLING_CURVES */
|
||||
#define EMBREE_FILTER_FUNCTION
|
||||
/* #undef EMBREE_IGNORE_INVALID_RAYS */
|
||||
#define EMBREE_GEOMETRY_TRIANGLE
|
||||
/* #undef EMBREE_GEOMETRY_QUAD */
|
||||
/* #undef EMBREE_GEOMETRY_CURVE */
|
||||
/* #undef EMBREE_GEOMETRY_SUBDIVISION */
|
||||
/* #undef EMBREE_GEOMETRY_USER */
|
||||
/* #undef EMBREE_GEOMETRY_INSTANCE */
|
||||
/* #undef EMBREE_GEOMETRY_GRID */
|
||||
/* #undef EMBREE_GEOMETRY_POINT */
|
||||
#define EMBREE_RAY_PACKETS
|
||||
/* #undef EMBREE_COMPACT_POLYS */
|
||||
|
||||
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
|
||||
#define EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
#define IF_ENABLED_TRIS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_TRIS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
#define IF_ENABLED_QUADS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_QUADS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
|
||||
#define IF_ENABLED_CURVES_OR_POINTS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_CURVES_OR_POINTS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_CURVE)
|
||||
#define IF_ENABLED_CURVES(x) x
|
||||
#else
|
||||
#define IF_ENABLED_CURVES(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_POINT)
|
||||
#define IF_ENABLED_POINTS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_POINTS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
|
||||
#define IF_ENABLED_SUBDIV(x) x
|
||||
#else
|
||||
#define IF_ENABLED_SUBDIV(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
#define IF_ENABLED_USER(x) x
|
||||
#else
|
||||
#define IF_ENABLED_USER(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
#define IF_ENABLED_INSTANCE(x) x
|
||||
#else
|
||||
#define IF_ENABLED_INSTANCE(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
#define IF_ENABLED_GRIDS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_GRIDS(x)
|
||||
#endif
|
||||
"""
|
||||
)
|
||||
|
||||
for config_file in config_files:
|
||||
os.rename(os.path.join(dest_dir, config_file), os.path.join(dest_dir, config_file[:-3]))
|
||||
|
||||
with open("CMakeLists.txt", "r", encoding="utf-8") as cmake_file:
|
||||
cmake_content = cmake_file.read()
|
||||
|
@ -193,70 +154,25 @@ with open("CMakeLists.txt", "r", encoding="utf-8") as cmake_file:
|
|||
minor_version = int(re.compile(r"EMBREE_VERSION_MINOR\s(\d+)").findall(cmake_content)[0])
|
||||
patch_version = int(re.compile(r"EMBREE_VERSION_PATCH\s(\d+)").findall(cmake_content)[0])
|
||||
|
||||
shutil.move(os.path.join(dest_dir, "kernels/rtcore_config.h"), os.path.join(dest_dir, ("include/embree4/")))
|
||||
|
||||
with open(
|
||||
os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w", encoding="utf-8", newline="\n"
|
||||
) as config_file:
|
||||
config_file.write(
|
||||
f"""// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#define RTC_VERSION_MAJOR {major_version}
|
||||
#define RTC_VERSION_MINOR {minor_version}
|
||||
#define RTC_VERSION_PATCH {patch_version}
|
||||
#define RTC_VERSION {major_version}{minor_version:02d}{patch_version:02d}
|
||||
#define RTC_VERSION_STRING "{major_version}.{minor_version}.{patch_version}"
|
||||
|
||||
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
|
||||
|
||||
#define EMBREE_MIN_WIDTH 0
|
||||
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
|
||||
|
||||
#if !defined(EMBREE_STATIC_LIB)
|
||||
# define EMBREE_STATIC_LIB
|
||||
#endif
|
||||
/* #undef EMBREE_API_NAMESPACE*/
|
||||
|
||||
#if defined(EMBREE_API_NAMESPACE)
|
||||
# define RTC_NAMESPACE
|
||||
# define RTC_NAMESPACE_BEGIN namespace {{
|
||||
# define RTC_NAMESPACE_END }}
|
||||
# define RTC_NAMESPACE_USE using namespace;
|
||||
# define RTC_API_EXTERN_C
|
||||
# undef EMBREE_API_NAMESPACE
|
||||
#else
|
||||
# define RTC_NAMESPACE_BEGIN
|
||||
# define RTC_NAMESPACE_END
|
||||
# define RTC_NAMESPACE_USE
|
||||
# if defined(__cplusplus)
|
||||
# define RTC_API_EXTERN_C extern "C"
|
||||
# else
|
||||
# define RTC_API_EXTERN_C
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ISPC)
|
||||
# define RTC_API_IMPORT extern "C" unmasked
|
||||
# define RTC_API_EXPORT extern "C" unmasked
|
||||
#elif defined(EMBREE_STATIC_LIB)
|
||||
# define RTC_API_IMPORT RTC_API_EXTERN_C
|
||||
# define RTC_API_EXPORT RTC_API_EXTERN_C
|
||||
#elif defined(_WIN32)
|
||||
# define RTC_API_IMPORT RTC_API_EXTERN_C __declspec(dllimport)
|
||||
# define RTC_API_EXPORT RTC_API_EXTERN_C __declspec(dllexport)
|
||||
#else
|
||||
# define RTC_API_IMPORT RTC_API_EXTERN_C
|
||||
# define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
|
||||
#if defined(RTC_EXPORT_API)
|
||||
# define RTC_API RTC_API_EXPORT
|
||||
#else
|
||||
# define RTC_API RTC_API_IMPORT
|
||||
#endif
|
||||
"""
|
||||
)
|
||||
os.path.join(dest_dir, "include/embree4/rtcore_config.h"), "r+", encoding="utf-8", newline="\n"
|
||||
) as rtcore_config:
|
||||
lines = rtcore_config.readlines()
|
||||
rtcore_config.seek(0)
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith("#define RTC_VERSION_MAJOR"):
|
||||
lines[i : i + 5] = [
|
||||
f"#define RTC_VERSION_MAJOR {major_version}\n",
|
||||
f"#define RTC_VERSION_MINOR {minor_version}\n",
|
||||
f"#define RTC_VERSION_PATCH {patch_version}\n",
|
||||
f"#define RTC_VERSION {major_version}{minor_version:02d}{patch_version:02d}\n",
|
||||
f'#define RTC_VERSION_STRING "{major_version}.{minor_version}.{patch_version}"\n',
|
||||
]
|
||||
break
|
||||
rtcore_config.writelines(lines)
|
||||
rtcore_config.truncate()
|
||||
|
||||
os.chdir("..")
|
||||
shutil.rmtree("embree-tmp")
|
||||
|
@ -264,4 +180,4 @@ shutil.rmtree("embree-tmp")
|
|||
subprocess.run(["git", "restore", "embree/patches"])
|
||||
|
||||
for patch in os.listdir("embree/patches"):
|
||||
subprocess.run(["git", "apply", "embree/patches/" + patch])
|
||||
subprocess.run(["git", "apply", f"embree/patches/{patch}"])
|
||||
|
|
|
@ -69,11 +69,12 @@ void LightmapRaycasterEmbree::filter_function(const struct RTCFilterFunctionNArg
|
|||
}
|
||||
|
||||
bool LightmapRaycasterEmbree::intersect(Ray &r_ray) {
|
||||
RTCIntersectContext context;
|
||||
|
||||
rtcInitIntersectContext(&context);
|
||||
|
||||
rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray);
|
||||
RTCRayQueryContext context;
|
||||
rtcInitRayQueryContext(&context);
|
||||
RTCIntersectArguments args;
|
||||
rtcInitIntersectArguments(&args);
|
||||
args.context = &context;
|
||||
rtcIntersect1(embree_scene, (RTCRayHit *)&r_ray, &args);
|
||||
return r_ray.geomID != RTC_INVALID_GEOMETRY_ID;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
#include "core/object/object.h"
|
||||
#include "scene/3d/lightmapper.h"
|
||||
|
||||
#include <embree3/rtcore.h>
|
||||
#include <embree4/rtcore.h>
|
||||
|
||||
class LightmapRaycasterEmbree : public LightmapRaycaster {
|
||||
GDCLASS(LightmapRaycasterEmbree, LightmapRaycaster);
|
||||
|
|
|
@ -488,11 +488,13 @@ void RaycastOcclusionCull::Scenario::update() {
|
|||
}
|
||||
|
||||
void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThreadData *p_raycast_data) const {
|
||||
RTCIntersectContext ctx;
|
||||
rtcInitIntersectContext(&ctx);
|
||||
ctx.flags = RTC_INTERSECT_CONTEXT_FLAG_COHERENT;
|
||||
|
||||
rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]);
|
||||
RTCRayQueryContext context;
|
||||
rtcInitRayQueryContext(&context);
|
||||
RTCIntersectArguments args;
|
||||
rtcInitIntersectArguments(&args);
|
||||
args.flags = RTC_RAY_QUERY_FLAG_COHERENT;
|
||||
args.context = &context;
|
||||
rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &p_raycast_data->rays[p_idx], &args);
|
||||
}
|
||||
|
||||
void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const {
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#include "scene/resources/mesh.h"
|
||||
#include "servers/rendering/renderer_scene_occlusion_cull.h"
|
||||
|
||||
#include <embree3/rtcore.h>
|
||||
#include <embree4/rtcore.h>
|
||||
|
||||
class RaycastOcclusionCull : public RendererSceneOcclusionCull {
|
||||
typedef RTCRayHit16 CameraRayTile;
|
||||
|
|
|
@ -53,9 +53,12 @@ void StaticRaycasterEmbree::free() {
|
|||
}
|
||||
|
||||
bool StaticRaycasterEmbree::intersect(Ray &r_ray) {
|
||||
RTCIntersectContext context;
|
||||
rtcInitIntersectContext(&context);
|
||||
rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray);
|
||||
RTCRayQueryContext context;
|
||||
rtcInitRayQueryContext(&context);
|
||||
RTCIntersectArguments args;
|
||||
rtcInitIntersectArguments(&args);
|
||||
args.context = &context;
|
||||
rtcIntersect1(embree_scene, (RTCRayHit *)&r_ray, &args);
|
||||
return r_ray.geomID != RTC_INVALID_GEOMETRY_ID;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
#include "core/math/static_raycaster.h"
|
||||
|
||||
#include <embree3/rtcore.h>
|
||||
#include <embree4/rtcore.h>
|
||||
|
||||
class StaticRaycasterEmbree : public StaticRaycaster {
|
||||
GDCLASS(StaticRaycasterEmbree, StaticRaycaster);
|
||||
|
|
|
@ -172,13 +172,15 @@ Files extracted from upstream source:
|
|||
## embree
|
||||
|
||||
- Upstream: https://github.com/embree/embree
|
||||
- Version: 3.13.5 (698442324ccddd11725fb8875275dc1384f7fb40, 2022)
|
||||
- Version: 4.3.1 (daa8de0e714e18ad5e5c9841b67c1950d9c91c51, 2024)
|
||||
- License: Apache 2.0
|
||||
|
||||
Files extracted from upstream:
|
||||
|
||||
- All `.cpp` files listed in `modules/raycast/godot_update_embree.py`
|
||||
- All header files in the directories listed in `modules/raycast/godot_update_embree.py`
|
||||
- All config files listed in `modules/raycast/godot_update_embree.py`
|
||||
- `LICENSE.txt`
|
||||
|
||||
The `modules/raycast/godot_update_embree.py` script can be used to pull the
|
||||
relevant files from the latest Embree release and apply some automatic changes.
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -12,7 +12,8 @@ namespace embree
|
|||
template<typename Index, class UnaryPredicate>
|
||||
__forceinline bool parallel_any_of (Index first, Index last, UnaryPredicate pred)
|
||||
{
|
||||
bool ret = false;
|
||||
std::atomic_bool ret;
|
||||
ret = false;
|
||||
|
||||
#if defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include "../tasking/taskscheduler.h"
|
||||
#include "../sys/array.h"
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
#include "../math/range.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -14,17 +14,17 @@ namespace embree
|
|||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for( const Index N, const Func& func)
|
||||
{
|
||||
#if defined(TASKING_INTERNAL)
|
||||
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||
if (N) {
|
||||
TaskScheduler::TaskGroupContext context;
|
||||
TaskScheduler::spawn(Index(0),N,Index(1),[&] (const range<Index>& r) {
|
||||
assert(r.size() == 1);
|
||||
func(r.begin());
|
||||
});
|
||||
if (!TaskScheduler::wait())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
},&context);
|
||||
TaskScheduler::wait();
|
||||
if (context.cancellingException != nullptr) {
|
||||
std::rethrow_exception(context.cancellingException);
|
||||
}
|
||||
}
|
||||
#elif defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
|
@ -33,19 +33,13 @@ namespace embree
|
|||
func(i);
|
||||
},context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
});
|
||||
if (tbb::task::self().is_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
|
||||
#elif defined(TASKING_PPL)
|
||||
|
@ -62,13 +56,13 @@ namespace embree
|
|||
__forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func)
|
||||
{
|
||||
assert(first <= last);
|
||||
#if defined(TASKING_INTERNAL)
|
||||
TaskScheduler::spawn(first,last,minStepSize,func);
|
||||
if (!TaskScheduler::wait())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||
TaskScheduler::TaskGroupContext context;
|
||||
TaskScheduler::spawn(first,last,minStepSize,func,&context);
|
||||
TaskScheduler::wait();
|
||||
if (context.cancellingException != nullptr) {
|
||||
std::rethrow_exception(context.cancellingException);
|
||||
}
|
||||
|
||||
#elif defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
|
@ -77,19 +71,13 @@ namespace embree
|
|||
func(range<Index>(r.begin(),r.end()));
|
||||
},context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
|
||||
func(range<Index>(r.begin(),r.end()));
|
||||
});
|
||||
if (tbb::task::self().is_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
|
||||
#elif defined(TASKING_PPL)
|
||||
|
@ -121,19 +109,13 @@ namespace embree
|
|||
func(i);
|
||||
},tbb::simple_partitioner(),context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},tbb::simple_partitioner());
|
||||
if (tbb::task::self().is_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -148,19 +130,13 @@ namespace embree
|
|||
func(i);
|
||||
},ap,context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},ap);
|
||||
if (tbb::task::self().is_cancelled())
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task cancelled");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -175,8 +175,8 @@ namespace embree
|
|||
/* calculate all left and right ranges that are on the wrong global side */
|
||||
size_t numMisplacedRangesLeft = 0;
|
||||
size_t numMisplacedRangesRight = 0;
|
||||
size_t numMisplacedItemsLeft = 0;
|
||||
size_t numMisplacedItemsRight = 0;
|
||||
size_t numMisplacedItemsLeft MAYBE_UNUSED = 0;
|
||||
size_t numMisplacedItemsRight MAYBE_UNUSED = 0;
|
||||
|
||||
for (size_t i=0; i<numTasks; i++)
|
||||
{
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace embree
|
|||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
#if defined(TASKING_INTERNAL)
|
||||
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||
|
||||
/* fast path for small number of iterations */
|
||||
Index taskCount = (last-first+minStepSize-1)/minStepSize;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include "../sys/platform.h"
|
||||
#include "../sys/ref.h"
|
||||
#include "../sys/filename.h"
|
||||
#include "../sys/string.h"
|
||||
#include "../sys/estring.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
@ -122,17 +122,16 @@ namespace embree
|
|||
class FileStream : public Stream<int>
|
||||
{
|
||||
public:
|
||||
|
||||
FileStream (FILE* file, const std::string& name = "file")
|
||||
: file(file), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
|
||||
|
||||
FileStream (const FileName& fileName)
|
||||
: lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
|
||||
{
|
||||
file = fopen(fileName.c_str(),"r");
|
||||
if (file == nullptr) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
|
||||
if (ifs) ifs.close();
|
||||
ifs.open(fileName.str());
|
||||
if (!ifs.is_open()) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
|
||||
}
|
||||
~FileStream() {
|
||||
if (ifs) ifs.close();
|
||||
}
|
||||
~FileStream() { if (file) fclose(file); }
|
||||
|
||||
public:
|
||||
ParseLocation location() {
|
||||
|
@ -140,14 +139,15 @@ namespace embree
|
|||
}
|
||||
|
||||
int next() {
|
||||
int c = fgetc(file);
|
||||
int c = ifs.get();
|
||||
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||
charNumber++;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
FILE* file;
|
||||
std::ifstream ifs;
|
||||
ssize_t lineNumber; /// the line number the token is from
|
||||
ssize_t colNumber; /// the character number in the current line
|
||||
ssize_t charNumber; /// the character in the file
|
||||
|
|
|
@ -41,7 +41,9 @@ namespace embree
|
|||
int c = cin->get();
|
||||
// -- GODOT start --
|
||||
// if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
|
||||
if (!isValidChar(c)) abort();
|
||||
if (!isValidChar(c)) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
str.push_back((char)c);
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "tokenstream.h"
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
|
|
@ -337,7 +337,7 @@ namespace embree
|
|||
if (D) *D = sqrtf(D_x);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
|
||||
{
|
||||
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
|
||||
|
|
|
@ -56,6 +56,11 @@ namespace embree
|
|||
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
|
||||
}
|
||||
|
||||
/*! intersects two boxes */
|
||||
__forceinline static const BBox intersect (const BBox& a, const BBox& b) {
|
||||
return BBox(max(a.lower, b.lower), min(a.upper, b.upper));
|
||||
}
|
||||
|
||||
/*! enlarge box by some scaling factor */
|
||||
__forceinline BBox enlarge_by(const float a) const {
|
||||
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
# include "color_sycl.h"
|
||||
#else
|
||||
|
||||
#include "constants.h"
|
||||
#include "col3.h"
|
||||
#include "col4.h"
|
||||
|
@ -64,6 +68,10 @@ namespace embree
|
|||
d.b = (unsigned char)(s[2]);
|
||||
d.a = (unsigned char)(s[3]);
|
||||
}
|
||||
__forceinline void set(float &f) const
|
||||
{
|
||||
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
|
@ -256,3 +264,5 @@ namespace embree
|
|||
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "constants.h"
|
||||
#include "col3.h"
|
||||
#include "col4.h"
|
||||
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE RGBA Color Class
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct Color4
|
||||
{
|
||||
struct { float r,g,b,a; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Construction
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Color4 () {}
|
||||
//__forceinline Color4 ( const __m128 a ) : m128(a) {}
|
||||
|
||||
__forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {}
|
||||
__forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {}
|
||||
|
||||
__forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {}
|
||||
__forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {}
|
||||
__forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {}
|
||||
__forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {}
|
||||
|
||||
//__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
|
||||
//__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
|
||||
|
||||
//__forceinline operator const __m128&() const { return m128; }
|
||||
//__forceinline operator __m128&() { return m128; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Set
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
|
||||
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
|
||||
|
||||
__forceinline void set(Col3uc& d) const
|
||||
{
|
||||
d.r = (unsigned char)(clamp(r)*255.0f);
|
||||
d.g = (unsigned char)(clamp(g)*255.0f);
|
||||
d.b = (unsigned char)(clamp(b)*255.0f);
|
||||
}
|
||||
|
||||
__forceinline void set(Col4uc& d) const
|
||||
{
|
||||
d.r = (unsigned char)(clamp(r)*255.0f);
|
||||
d.g = (unsigned char)(clamp(g)*255.0f);
|
||||
d.b = (unsigned char)(clamp(b)*255.0f);
|
||||
d.a = (unsigned char)(clamp(a)*255.0f);
|
||||
}
|
||||
__forceinline void set(float &f) const
|
||||
{
|
||||
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {}
|
||||
__forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
|
||||
//__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||
//__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE RGB Color Class
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct Color
|
||||
{
|
||||
struct { float r,g,b; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Construction
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Color () {}
|
||||
//__forceinline Color ( const __m128 a ) : m128(a) {}
|
||||
|
||||
__forceinline explicit Color (const float v) : r(v), g(v), b(v) {}
|
||||
__forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {}
|
||||
|
||||
//__forceinline Color ( const Color& other ) : m128(other.m128) {}
|
||||
//__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
|
||||
|
||||
//__forceinline Color ( const Color4& other ) : m128(other.m128) {}
|
||||
//__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
|
||||
|
||||
//__forceinline operator const __m128&() const { return m128; }
|
||||
//__forceinline operator __m128&() { return m128; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Set
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
|
||||
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
|
||||
|
||||
#if 0
|
||||
__forceinline void set(Col3uc& d) const
|
||||
{
|
||||
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||
d.r = (unsigned char)(s[0]);
|
||||
d.g = (unsigned char)(s[1]);
|
||||
d.b = (unsigned char)(s[2]);
|
||||
}
|
||||
__forceinline void set(Col4uc& d) const
|
||||
{
|
||||
vfloat4 s = clamp(vfloat4(m128))*255.0f;
|
||||
d.r = (unsigned char)(s[0]);
|
||||
d.g = (unsigned char)(s[1]);
|
||||
d.b = (unsigned char)(s[2]);
|
||||
d.a = 255;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {}
|
||||
__forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {}
|
||||
//__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
|
||||
//__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const Color operator +( const Color& a ) { return a; }
|
||||
__forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); }
|
||||
__forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); }
|
||||
__forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); }
|
||||
__forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); }
|
||||
__forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); }
|
||||
__forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); }
|
||||
__forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); }
|
||||
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
|
||||
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
|
||||
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
|
||||
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
|
||||
|
||||
__forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); }
|
||||
__forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Assignment Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
|
||||
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
|
||||
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
|
||||
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
|
||||
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
|
||||
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reductions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
|
||||
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
|
||||
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
|
||||
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; }
|
||||
__forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; }
|
||||
__forceinline bool operator < ( const Color& a, const Color& b ) {
|
||||
if (a.r != b.r) return a.r < b.r;
|
||||
if (a.g != b.g) return a.g < b.g;
|
||||
if (a.b != b.b) return a.b < b.b;
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
|
||||
return s ? t : f;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Special Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*! computes luminance of a color */
|
||||
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
|
||||
|
||||
/*! output operator */
|
||||
inline std::ostream& operator<<(std::ostream& cout, const Color& a) {
|
||||
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
|
||||
}
|
||||
}
|
|
@ -8,6 +8,10 @@
|
|||
#include "constants.h"
|
||||
#include <cmath>
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
# include "math_sycl.h"
|
||||
#else
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
#include "../simd/arm/emulation.h"
|
||||
#else
|
||||
|
@ -44,6 +48,9 @@ namespace embree
|
|||
__forceinline int toInt (const float& a) { return int(a); }
|
||||
__forceinline float toFloat(const int& a) { return float(a); }
|
||||
|
||||
__forceinline int asInt (const float& a) { return *((int*)&a); }
|
||||
__forceinline float asFloat(const int& a) { return *((float*)&a); }
|
||||
|
||||
#if defined(__WIN32__)
|
||||
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
|
||||
#endif
|
||||
|
@ -351,7 +358,11 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur
|
|||
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
|
||||
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
|
||||
|
||||
__forceinline bool all(bool s) { return s; }
|
||||
__forceinline bool none(bool s) { return !s; }
|
||||
__forceinline bool all (bool s) { return s; }
|
||||
__forceinline bool any (bool s) { return s; }
|
||||
|
||||
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
|
||||
|
||||
__forceinline float lerp(const float v0, const float v1, const float t) {
|
||||
return madd(1.0f-t,v0,t*v1);
|
||||
|
@ -453,3 +464,5 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur
|
|||
return x | (y << 1) | (z << 2);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -179,6 +179,48 @@ namespace embree
|
|||
bounds1 = b1;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
|
||||
__forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range)
|
||||
{
|
||||
const BBox3f bounds0 = lbounds.bounds0;
|
||||
const BBox3f bounds1 = lbounds.bounds1;
|
||||
|
||||
/* normalize global target_time_range to local time_range_in */
|
||||
const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(),
|
||||
(target_time_range.upper-time_range_in.lower)/time_range_in.size());
|
||||
|
||||
const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper));
|
||||
|
||||
/* compute bounds at begin and end of clipped time range */
|
||||
BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower);
|
||||
BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper);
|
||||
|
||||
/* make sure that b0 is properly bounded at time_range_in.lower */
|
||||
{
|
||||
const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size());
|
||||
const T dlower = min(bounds0.lower-bt.lower, T(zero));
|
||||
const T dupper = max(bounds0.upper-bt.upper, T(zero));
|
||||
b0.lower += dlower; b1.lower += dlower;
|
||||
b0.upper += dupper; b1.upper += dupper;
|
||||
}
|
||||
|
||||
/* make sure that b1 is properly bounded at time_range_in.upper */
|
||||
{
|
||||
const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size());
|
||||
const T dlower = min(bounds1.lower-bt.lower, T(zero));
|
||||
const T dupper = max(bounds1.upper-bt.upper, T(zero));
|
||||
b0.lower += dlower; b1.lower += dlower;
|
||||
b0.upper += dupper; b1.upper += dupper;
|
||||
}
|
||||
|
||||
this->bounds0 = b0;
|
||||
this->bounds1 = b1;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
|
||||
__forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range)
|
||||
: LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {}
|
||||
|
||||
public:
|
||||
|
||||
__forceinline bool empty() const {
|
||||
|
|
|
@ -18,6 +18,7 @@ namespace embree
|
|||
|
||||
/*! default matrix constructor */
|
||||
__forceinline LinearSpace2 ( ) {}
|
||||
|
||||
__forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
|
||||
__forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ namespace embree
|
|||
|
||||
/*! default matrix constructor */
|
||||
__forceinline LinearSpace3 ( ) {}
|
||||
|
||||
__forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
|
||||
__forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
|
||||
|
||||
|
@ -90,17 +91,20 @@ namespace embree
|
|||
Vector vx,vy,vz;
|
||||
};
|
||||
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
/*! compute transposed matrix */
|
||||
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
|
||||
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
|
||||
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
__forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {
|
||||
return xfm.transposed();
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -0,0 +1,279 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/platform.h"
|
||||
#include "../sys/intrinsics.h"
|
||||
#include "constants.h"
|
||||
#include <cmath>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
__forceinline bool isvalid ( const float& v ) {
|
||||
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
|
||||
}
|
||||
|
||||
__forceinline int cast_f2i(float f) {
|
||||
return __builtin_bit_cast(int,f);
|
||||
}
|
||||
|
||||
__forceinline float cast_i2f(int i) {
|
||||
return __builtin_bit_cast(float,i);
|
||||
}
|
||||
|
||||
__forceinline int toInt (const float& a) { return int(a); }
|
||||
__forceinline float toFloat(const int& a) { return float(a); }
|
||||
|
||||
__forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
|
||||
__forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
|
||||
|
||||
//__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
|
||||
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
|
||||
__forceinline float sqr ( const float x ) { return x*x; }
|
||||
|
||||
__forceinline float rcp ( const float x ) {
|
||||
return sycl::native::recip(x);
|
||||
}
|
||||
|
||||
__forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
|
||||
//__forceinline float signmsk ( const float x ) {
|
||||
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
|
||||
//}
|
||||
//__forceinline float xorf( const float x, const float y ) {
|
||||
// return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
|
||||
//}
|
||||
//__forceinline float andf( const float x, const unsigned y ) {
|
||||
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
|
||||
//}
|
||||
|
||||
__forceinline float rsqrt( const float x ) {
|
||||
return sycl::rsqrt(x);
|
||||
}
|
||||
|
||||
//__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
|
||||
//__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
|
||||
//__forceinline int roundf(float f) { return (int)(f + 0.5f); }
|
||||
|
||||
__forceinline float abs ( const float x ) { return sycl::fabs(x); }
|
||||
__forceinline float acos ( const float x ) { return sycl::acos(x); }
|
||||
__forceinline float asin ( const float x ) { return sycl::asin(x); }
|
||||
__forceinline float atan ( const float x ) { return sycl::atan(x); }
|
||||
__forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
|
||||
__forceinline float cos ( const float x ) { return sycl::cos(x); }
|
||||
__forceinline float cosh ( const float x ) { return sycl::cosh(x); }
|
||||
__forceinline float exp ( const float x ) { return sycl::exp(x); }
|
||||
__forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
|
||||
__forceinline float log ( const float x ) { return sycl::log(x); }
|
||||
__forceinline float log10( const float x ) { return sycl::log10(x); }
|
||||
__forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
|
||||
__forceinline float sin ( const float x ) { return sycl::sin(x); }
|
||||
__forceinline float sinh ( const float x ) { return sycl::sinh(x); }
|
||||
__forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
|
||||
__forceinline float tan ( const float x ) { return sycl::tan(x); }
|
||||
__forceinline float tanh ( const float x ) { return sycl::tanh(x); }
|
||||
__forceinline float floor( const float x ) { return sycl::floor(x); }
|
||||
__forceinline float ceil ( const float x ) { return sycl::ceil(x); }
|
||||
__forceinline float frac ( const float x ) { return x-floor(x); }
|
||||
|
||||
//__forceinline double abs ( const double x ) { return ::fabs(x); }
|
||||
//__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
|
||||
//__forceinline double acos ( const double x ) { return ::acos (x); }
|
||||
//__forceinline double asin ( const double x ) { return ::asin (x); }
|
||||
//__forceinline double atan ( const double x ) { return ::atan (x); }
|
||||
//__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
|
||||
//__forceinline double cos ( const double x ) { return ::cos (x); }
|
||||
//__forceinline double cosh ( const double x ) { return ::cosh (x); }
|
||||
//__forceinline double exp ( const double x ) { return ::exp (x); }
|
||||
//__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
|
||||
//__forceinline double log ( const double x ) { return ::log (x); }
|
||||
//__forceinline double log10( const double x ) { return ::log10(x); }
|
||||
//__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
|
||||
//__forceinline double rcp ( const double x ) { return 1.0/x; }
|
||||
//__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
|
||||
//__forceinline double sin ( const double x ) { return ::sin (x); }
|
||||
//__forceinline double sinh ( const double x ) { return ::sinh (x); }
|
||||
//__forceinline double sqr ( const double x ) { return x*x; }
|
||||
//__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
|
||||
//__forceinline double tan ( const double x ) { return ::tan (x); }
|
||||
//__forceinline double tanh ( const double x ) { return ::tanh (x); }
|
||||
//__forceinline double floor( const double x ) { return ::floor (x); }
|
||||
//__forceinline double ceil ( const double x ) { return ::ceil (x); }
|
||||
|
||||
/*
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline float mini(float a, float b) {
|
||||
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
|
||||
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
|
||||
const __m128i ci = _mm_min_epi32(ai,bi);
|
||||
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline float maxi(float a, float b) {
|
||||
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
|
||||
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
|
||||
const __m128i ci = _mm_max_epi32(ai,bi);
|
||||
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
|
||||
template<typename T>
|
||||
__forceinline T twice(const T& a) { return a+a; }
|
||||
|
||||
__forceinline int min(int a, int b) { return sycl::min(a,b); }
|
||||
__forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
|
||||
__forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
|
||||
__forceinline float min(float a, float b) { return sycl::fmin(a,b); }
|
||||
__forceinline double min(double a, double b) { return sycl::fmin(a,b); }
|
||||
#if defined(__X86_64__)
|
||||
__forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
|
||||
#endif
|
||||
|
||||
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
|
||||
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
|
||||
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
|
||||
|
||||
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
|
||||
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
|
||||
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
|
||||
|
||||
__forceinline int max(int a, int b) { return sycl::max(a,b); }
|
||||
__forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
|
||||
__forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
|
||||
__forceinline float max(float a, float b) { return sycl::fmax(a,b); }
|
||||
__forceinline double max(double a, double b) { return sycl::fmax(a,b); }
|
||||
#if defined(__X86_64__)
|
||||
__forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
|
||||
#endif
|
||||
|
||||
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
|
||||
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
|
||||
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
|
||||
|
||||
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
|
||||
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
|
||||
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
|
||||
|
||||
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
|
||||
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
|
||||
|
||||
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
|
||||
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
|
||||
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
|
||||
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
|
||||
|
||||
__forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
|
||||
__forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
|
||||
__forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
|
||||
__forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
|
||||
|
||||
/*! random functions */
|
||||
/*
|
||||
template<typename T> T random() { return T(0); }
|
||||
template<> __forceinline int random() { return int(rand()); }
|
||||
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
|
||||
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
|
||||
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
|
||||
*/
|
||||
|
||||
/*! selects */
|
||||
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
|
||||
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
|
||||
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
|
||||
|
||||
__forceinline bool none(bool s) { return !s; }
|
||||
__forceinline bool all (bool s) { return s; }
|
||||
__forceinline bool any (bool s) { return s; }
|
||||
|
||||
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
|
||||
|
||||
__forceinline float lerp(const float v0, const float v1, const float t) {
|
||||
return madd(1.0f-t,v0,t*v1);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
|
||||
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
|
||||
}
|
||||
|
||||
/*! exchange */
|
||||
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
|
||||
|
||||
/* load/store */
|
||||
template<typename Ty> struct mem;
|
||||
|
||||
template<> struct mem<float> {
|
||||
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
|
||||
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
|
||||
|
||||
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
|
||||
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
|
||||
};
|
||||
|
||||
/*! bit reverse operation */
|
||||
template<class T>
|
||||
__forceinline T bitReverse(const T& vin)
|
||||
{
|
||||
T v = vin;
|
||||
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
|
||||
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
|
||||
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
|
||||
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
|
||||
v = ( v >> 16 ) | ( v << 16);
|
||||
return v;
|
||||
}
|
||||
|
||||
/*! bit interleave operation */
|
||||
template<class T>
|
||||
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
|
||||
{
|
||||
T x = xin, y = yin, z = zin;
|
||||
x = (x | (x << 16)) & 0x030000FF;
|
||||
x = (x | (x << 8)) & 0x0300F00F;
|
||||
x = (x | (x << 4)) & 0x030C30C3;
|
||||
x = (x | (x << 2)) & 0x09249249;
|
||||
|
||||
y = (y | (y << 16)) & 0x030000FF;
|
||||
y = (y | (y << 8)) & 0x0300F00F;
|
||||
y = (y | (y << 4)) & 0x030C30C3;
|
||||
y = (y | (y << 2)) & 0x09249249;
|
||||
|
||||
z = (z | (z << 16)) & 0x030000FF;
|
||||
z = (z | (z << 8)) & 0x0300F00F;
|
||||
z = (z | (z << 4)) & 0x030C30C3;
|
||||
z = (z | (z << 2)) & 0x09249249;
|
||||
|
||||
return x | (y << 1) | (z << 2);
|
||||
}
|
||||
|
||||
/*! bit interleave operation for 64bit data types*/
|
||||
template<class T>
|
||||
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
|
||||
T x = xin & 0x1fffff;
|
||||
T y = yin & 0x1fffff;
|
||||
T z = zin & 0x1fffff;
|
||||
|
||||
x = (x | x << 32) & 0x1f00000000ffff;
|
||||
x = (x | x << 16) & 0x1f0000ff0000ff;
|
||||
x = (x | x << 8) & 0x100f00f00f00f00f;
|
||||
x = (x | x << 4) & 0x10c30c30c30c30c3;
|
||||
x = (x | x << 2) & 0x1249249249249249;
|
||||
|
||||
y = (y | y << 32) & 0x1f00000000ffff;
|
||||
y = (y | y << 16) & 0x1f0000ff0000ff;
|
||||
y = (y | y << 8) & 0x100f00f00f00f00f;
|
||||
y = (y | y << 4) & 0x10c30c30c30c30c3;
|
||||
y = (y | y << 2) & 0x1249249249249249;
|
||||
|
||||
z = (z | z << 32) & 0x1f00000000ffff;
|
||||
z = (z | z << 16) & 0x1f0000ff0000ff;
|
||||
z = (z | z << 8) & 0x100f00f00f00f00f;
|
||||
z = (z | z << 4) & 0x10c30c30c30c30c3;
|
||||
z = (z | z << 2) & 0x1249249249249249;
|
||||
|
||||
return x | (y << 1) | (z << 2);
|
||||
}
|
||||
}
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "../sys/platform.h"
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
@ -34,7 +34,7 @@ namespace embree
|
|||
__forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
|
||||
|
||||
__forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
|
||||
__forceinline Vec2( const Vec2fa& other );
|
||||
Vec2( const Vec2fa& other );
|
||||
|
||||
template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
|
||||
template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
|
||||
|
@ -232,4 +232,5 @@ namespace embree
|
|||
#if defined(__AVX512F__)
|
||||
template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
@ -4,7 +4,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
# include "vec2fa_sycl.h"
|
||||
#else
|
||||
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -316,3 +321,5 @@ namespace embree
|
|||
|
||||
typedef Vec2fa Vec2fa_t;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,270 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "emath.h"
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct Vec3fa;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE Vec2fa Type
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct __aligned(16) Vec2fa
|
||||
{
|
||||
//ALIGNED_STRUCT_(16);
|
||||
|
||||
typedef float Scalar;
|
||||
enum { N = 2 };
|
||||
struct { float x,y; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constructors, Assignment & Cast Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa( ) {}
|
||||
//__forceinline Vec2fa( const __m128 a ) : m128(a) {}
|
||||
explicit Vec2fa(const Vec3fa& a);
|
||||
|
||||
__forceinline explicit Vec2fa( const vfloat<4>& a ) {
|
||||
x = a[0];
|
||||
y = a[1];
|
||||
}
|
||||
|
||||
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
|
||||
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
|
||||
|
||||
__forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; }
|
||||
__forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; }
|
||||
|
||||
__forceinline explicit Vec2fa( const float a ) : x(a), y(a) {}
|
||||
__forceinline Vec2fa( const float x, const float y) : x(x), y(y) {}
|
||||
|
||||
//__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
|
||||
|
||||
//__forceinline operator const __m128&() const { return m128; }
|
||||
//__forceinline operator __m128&() { return m128; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Loads and Stores
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static __forceinline Vec2fa load( const void* const a ) {
|
||||
const float* ptr = (const float*)a;
|
||||
return Vec2fa(ptr[0],ptr[1]);
|
||||
}
|
||||
|
||||
static __forceinline Vec2fa loadu( const void* const a ) {
|
||||
const float* ptr = (const float*)a;
|
||||
return Vec2fa(ptr[0],ptr[1]);
|
||||
}
|
||||
|
||||
static __forceinline void storeu ( void* a, const Vec2fa& v ) {
|
||||
float* ptr = (float*)a;
|
||||
ptr[0] = v.x; ptr[1] = v.y;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {}
|
||||
__forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {}
|
||||
__forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {}
|
||||
__forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Array Access
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
|
||||
//__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
|
||||
__forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); }
|
||||
__forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
|
||||
__forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
|
||||
|
||||
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
|
||||
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
|
||||
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
|
||||
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
|
||||
|
||||
__forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); }
|
||||
|
||||
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
|
||||
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
|
||||
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
|
||||
return Vec2fa(x,y);
|
||||
}
|
||||
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
|
||||
return rcp(zero_fix(a));
|
||||
}
|
||||
__forceinline Vec2fa log ( const Vec2fa& a ) {
|
||||
return Vec2fa(sycl::log(a.x),sycl::log(a.y));
|
||||
}
|
||||
|
||||
__forceinline Vec2fa exp ( const Vec2fa& a ) {
|
||||
return Vec2fa(sycl::exp(a.x),sycl::exp(a.y));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); }
|
||||
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); }
|
||||
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); }
|
||||
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
|
||||
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
|
||||
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); }
|
||||
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); }
|
||||
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); }
|
||||
|
||||
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) {
|
||||
return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y));
|
||||
}
|
||||
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) {
|
||||
return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y));
|
||||
}
|
||||
|
||||
/*
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
|
||||
const vint4 ai = _mm_castps_si128(a);
|
||||
const vint4 bi = _mm_castps_si128(b);
|
||||
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||
return _mm_castsi128_ps(ci);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
|
||||
const vint4 ai = _mm_castps_si128(a);
|
||||
const vint4 bi = _mm_castps_si128(b);
|
||||
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||
return _mm_castsi128_ps(ci);
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
|
||||
return Vec2fa(powf(a.x,b),powf(a.y,b));
|
||||
}
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Ternary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); }
|
||||
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); }
|
||||
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); }
|
||||
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); }
|
||||
|
||||
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
|
||||
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
|
||||
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
|
||||
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Assignment Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
|
||||
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
|
||||
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
|
||||
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
|
||||
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
|
||||
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reductions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
|
||||
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
|
||||
__forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); }
|
||||
__forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; }
|
||||
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Euclidian Space Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
|
||||
return reduce_add(a*b);
|
||||
}
|
||||
|
||||
__forceinline Vec2fa cross ( const Vec2fa& a ) {
|
||||
return Vec2fa(-a.y,a.x);
|
||||
}
|
||||
|
||||
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
|
||||
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
|
||||
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
|
||||
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
|
||||
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
|
||||
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
|
||||
return Vec2fa(s ? t.x : f.x, s ? t.y : f.y);
|
||||
}
|
||||
|
||||
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
|
||||
return madd(1.0f-t,v0,t*v1);
|
||||
}
|
||||
|
||||
__forceinline int maxDim ( const Vec2fa& a )
|
||||
{
|
||||
const Vec2fa b = abs(a);
|
||||
if (b.x > b.y) return 0;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Rounding Functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); }
|
||||
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); }
|
||||
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Output Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
|
||||
return cout << "(" << a.x << ", " << a.y << ")";
|
||||
}
|
||||
|
||||
/*template<>
|
||||
__forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a)
|
||||
{
|
||||
v = 0;
|
||||
const unsigned int lid = get_sub_group_local_id();
|
||||
if (lid == 0) v = a.x;
|
||||
if (lid == 1) v = a.y;
|
||||
}*/
|
||||
|
||||
typedef Vec2fa Vec2fa_t;
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
@ -286,6 +286,8 @@ namespace embree
|
|||
|
||||
template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
|
||||
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
#if defined(__AVX__)
|
||||
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
|
||||
x = a.x; y = a.y; z = a.z;
|
||||
|
@ -333,4 +335,23 @@ namespace embree
|
|||
#if defined(__AVX512F__)
|
||||
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__SSE__)
|
||||
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
|
||||
x = a.x; y = a.y; z = a.z;
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
|
||||
x = a.x; y = a.y; z = a.z;
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX512F__)
|
||||
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) {
|
||||
x = a.x; y = a.y; z = a.z;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -4,7 +4,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
# include "vec3ba_sycl.h"
|
||||
#else
|
||||
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -118,3 +123,5 @@ namespace embree
|
|||
return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "emath.h"
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE Vec3ba Type
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct __aligned(16) Vec3ba
|
||||
{
|
||||
//ALIGNED_STRUCT_(16);
|
||||
|
||||
struct { bool x,y,z; };
|
||||
|
||||
typedef bool Scalar;
|
||||
enum { N = 3 };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constructors, Assignment & Cast Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ba( ) {}
|
||||
//__forceinline Vec3ba( const __m128 input ) : m128(input) {}
|
||||
|
||||
__forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {}
|
||||
__forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||
|
||||
__forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {}
|
||||
__forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {}
|
||||
|
||||
//__forceinline operator const __m128&() const { return m128; }
|
||||
//__forceinline operator __m128&() { return m128; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {}
|
||||
__forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Array Access
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||
//__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); }
|
||||
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); }
|
||||
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Assignment Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
|
||||
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
|
||||
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators + Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
|
||||
return a.x == b.x && a.y == b.y && a.z == b.z;
|
||||
}
|
||||
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
|
||||
return a.x != b.x || a.y != b.y || a.z != b.z;
|
||||
}
|
||||
/*
|
||||
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
|
||||
if (a.x != b.x) return a.x < b.x;
|
||||
if (a.y != b.y) return a.y < b.y;
|
||||
if (a.z != b.z) return a.z < b.z;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reduction Operations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; }
|
||||
__forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; }
|
||||
|
||||
__forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); }
|
||||
__forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); }
|
||||
__forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Output Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
|
||||
return cout;
|
||||
}
|
||||
}
|
|
@ -4,7 +4,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
# include "vec3fa_sycl.h"
|
||||
#else
|
||||
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -441,7 +446,6 @@ namespace embree
|
|||
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
|
||||
|
||||
__forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
|
||||
|
||||
__forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
|
||||
|
||||
__forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
|
||||
|
@ -783,3 +787,5 @@ namespace embree
|
|||
|
||||
typedef Vec3fx Vec3ff;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,617 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "emath.h"
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE Vec3fa Type
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct __aligned(16) Vec3fa
|
||||
{
|
||||
//ALIGNED_STRUCT_(16);
|
||||
|
||||
typedef float Scalar;
|
||||
enum { N = 3 };
|
||||
struct { float x,y,z, do_not_use; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constructors, Assignment & Cast Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa( ) {}
|
||||
//__forceinline Vec3fa( const __m128 a ) : m128(a) {}
|
||||
//__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||
|
||||
__forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
|
||||
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||
|
||||
__forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
|
||||
__forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||
|
||||
__forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
|
||||
__forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
|
||||
|
||||
__forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
|
||||
|
||||
//__forceinline operator const __m128&() const { return m128; }
|
||||
//__forceinline operator __m128&() { return m128; }
|
||||
__forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
|
||||
|
||||
//friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Loads and Stores
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static __forceinline Vec3fa load( const void* const a ) {
|
||||
const float* ptr = (const float*)a;
|
||||
return Vec3fa(ptr[0],ptr[1],ptr[2]);
|
||||
}
|
||||
|
||||
static __forceinline Vec3fa loadu( const void* const a ) {
|
||||
const float* ptr = (const float*)a;
|
||||
return Vec3fa(ptr[0],ptr[1],ptr[2]);
|
||||
}
|
||||
|
||||
static __forceinline void storeu ( void* a, const Vec3fa& v ) {
|
||||
float* ptr = (float*)a;
|
||||
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
|
||||
__forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
|
||||
__forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
|
||||
__forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Array Access
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
|
||||
__forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
|
||||
__forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
|
||||
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
|
||||
|
||||
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
|
||||
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
|
||||
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
|
||||
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
|
||||
|
||||
__forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
|
||||
|
||||
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
|
||||
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
|
||||
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
|
||||
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
|
||||
return Vec3fa(x,y,z);
|
||||
}
|
||||
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
|
||||
return rcp(zero_fix(a));
|
||||
}
|
||||
__forceinline Vec3fa log ( const Vec3fa& a ) {
|
||||
return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
|
||||
}
|
||||
|
||||
__forceinline Vec3fa exp ( const Vec3fa& a ) {
|
||||
return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
|
||||
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
|
||||
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
|
||||
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
|
||||
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
|
||||
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
|
||||
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
|
||||
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
|
||||
|
||||
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
|
||||
return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
|
||||
}
|
||||
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
|
||||
return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
|
||||
}
|
||||
|
||||
/*
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
|
||||
const vint4 ai = _mm_castps_si128(a);
|
||||
const vint4 bi = _mm_castps_si128(b);
|
||||
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||
return _mm_castsi128_ps(ci);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
|
||||
const vint4 ai = _mm_castps_si128(a);
|
||||
const vint4 bi = _mm_castps_si128(b);
|
||||
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||
return _mm_castsi128_ps(ci);
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
|
||||
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Ternary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
|
||||
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
|
||||
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
|
||||
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
|
||||
|
||||
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
|
||||
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
|
||||
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
|
||||
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Assignment Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
|
||||
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
|
||||
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
|
||||
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
|
||||
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
|
||||
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reductions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
|
||||
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
|
||||
__forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
|
||||
__forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
|
||||
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
|
||||
|
||||
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
|
||||
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
|
||||
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
|
||||
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
|
||||
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
|
||||
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
|
||||
|
||||
__forceinline bool isvalid ( const Vec3fa& v ) {
|
||||
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
|
||||
}
|
||||
|
||||
__forceinline bool is_finite ( const Vec3fa& a ) {
|
||||
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Euclidian Space Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
|
||||
return reduce_add(a*b);
|
||||
}
|
||||
|
||||
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
|
||||
return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
|
||||
}
|
||||
|
||||
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
|
||||
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
|
||||
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
|
||||
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
|
||||
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
|
||||
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
|
||||
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
|
||||
|
||||
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
|
||||
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
|
||||
}
|
||||
|
||||
/*! differentiated normalization */
|
||||
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
|
||||
{
|
||||
const float pp = dot(p,p);
|
||||
const float pdp = dot(p,dp);
|
||||
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
|
||||
return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
|
||||
}
|
||||
|
||||
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
|
||||
return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
|
||||
}
|
||||
|
||||
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
|
||||
return madd(1.0f-t,v0,t*v1);
|
||||
}
|
||||
|
||||
__forceinline int maxDim ( const Vec3fa& a )
|
||||
{
|
||||
const Vec3fa b = abs(a);
|
||||
if (b.x > b.y) {
|
||||
if (b.x > b.z) return 0; else return 2;
|
||||
} else {
|
||||
if (b.y > b.z) return 1; else return 2;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Rounding Functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
|
||||
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
|
||||
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Output Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
|
||||
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||
}
|
||||
|
||||
__forceinline Vec2fa::Vec2fa(const Vec3fa& a)
|
||||
: x(a.x), y(a.y) {}
|
||||
|
||||
__forceinline Vec3ia::Vec3ia( const Vec3fa& a )
|
||||
: x((int)a.x), y((int)a.y), z((int)a.z) {}
|
||||
|
||||
typedef Vec3fa Vec3fa_t;
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE Vec3fx Type
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct __aligned(16) Vec3fx
|
||||
{
|
||||
//ALIGNED_STRUCT_(16);
|
||||
|
||||
typedef float Scalar;
|
||||
enum { N = 3 };
|
||||
struct { float x,y,z; union { int a; unsigned u; float w; }; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constructors, Assignment & Cast Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx( ) {}
|
||||
//__forceinline Vec3fx( const __m128 a ) : m128(a) {}
|
||||
__forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
|
||||
|
||||
__forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
|
||||
__forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
|
||||
|
||||
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
|
||||
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||
|
||||
//__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
|
||||
//__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
|
||||
|
||||
__forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
|
||||
__forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
|
||||
|
||||
__forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
|
||||
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
|
||||
__forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
|
||||
|
||||
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
|
||||
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
|
||||
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
|
||||
|
||||
__forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
|
||||
|
||||
//__forceinline operator const __m128&() const { return m128; }
|
||||
//__forceinline operator __m128&() { return m128; }
|
||||
__forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
|
||||
|
||||
//friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Loads and Stores
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static __forceinline Vec3fx load( const void* const a ) {
|
||||
const float* ptr = (const float*)a;
|
||||
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
|
||||
}
|
||||
|
||||
static __forceinline Vec3fx loadu( const void* const a ) {
|
||||
const float* ptr = (const float*)a;
|
||||
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
|
||||
}
|
||||
|
||||
static __forceinline void storeu ( void* a, const Vec3fx& v ) {
|
||||
float* ptr = (float*)a;
|
||||
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
|
||||
__forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
|
||||
__forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
|
||||
__forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Array Access
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
|
||||
__forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
|
||||
__forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
|
||||
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
|
||||
|
||||
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
|
||||
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
|
||||
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
|
||||
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
|
||||
|
||||
__forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
|
||||
|
||||
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
|
||||
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
|
||||
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
|
||||
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
|
||||
return Vec3fx(x,y,z);
|
||||
}
|
||||
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
|
||||
return rcp(zero_fix(a));
|
||||
}
|
||||
__forceinline Vec3fx log ( const Vec3fx& a ) {
|
||||
return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
|
||||
}
|
||||
|
||||
__forceinline Vec3fx exp ( const Vec3fx& a ) {
|
||||
return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
|
||||
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
|
||||
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
|
||||
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
|
||||
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
|
||||
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
|
||||
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
|
||||
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
|
||||
|
||||
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
|
||||
return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
|
||||
}
|
||||
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
|
||||
return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
|
||||
}
|
||||
|
||||
/*
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
|
||||
const vint4 ai = _mm_castps_si128(a);
|
||||
const vint4 bi = _mm_castps_si128(b);
|
||||
const vint4 ci = _mm_min_epi32(ai,bi);
|
||||
return _mm_castsi128_ps(ci);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
|
||||
const vint4 ai = _mm_castps_si128(a);
|
||||
const vint4 bi = _mm_castps_si128(b);
|
||||
const vint4 ci = _mm_max_epi32(ai,bi);
|
||||
return _mm_castsi128_ps(ci);
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
|
||||
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
|
||||
}
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Ternary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
|
||||
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
|
||||
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
|
||||
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
|
||||
|
||||
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
|
||||
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
|
||||
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
|
||||
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Assignment Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
|
||||
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
|
||||
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
|
||||
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
|
||||
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
|
||||
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reductions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
|
||||
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
|
||||
__forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
|
||||
__forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
|
||||
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
|
||||
|
||||
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
|
||||
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
|
||||
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
|
||||
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
|
||||
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
|
||||
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
|
||||
|
||||
__forceinline bool isvalid ( const Vec3fx& v ) {
|
||||
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
|
||||
}
|
||||
|
||||
__forceinline bool is_finite ( const Vec3fx& a ) {
|
||||
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
|
||||
}
|
||||
|
||||
__forceinline bool isvalid4 ( const Vec3fx& v ) {
|
||||
const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
|
||||
const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
|
||||
const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
|
||||
const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
|
||||
return valid_x & valid_y & valid_z & valid_w;
|
||||
}
|
||||
|
||||
__forceinline bool is_finite4 ( const Vec3fx& v ) {
|
||||
const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
|
||||
const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
|
||||
const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
|
||||
const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
|
||||
return finite_x & finite_y & finite_z & finite_w;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Euclidian Space Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
|
||||
return reduce_add(a*b);
|
||||
}
|
||||
|
||||
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
|
||||
return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
|
||||
}
|
||||
|
||||
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
|
||||
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
|
||||
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
|
||||
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
|
||||
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
|
||||
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
|
||||
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
|
||||
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
|
||||
|
||||
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
|
||||
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
|
||||
}
|
||||
|
||||
/*! differentiated normalization */
|
||||
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
|
||||
{
|
||||
const float pp = dot(p,p);
|
||||
const float pdp = dot(p,dp);
|
||||
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
|
||||
return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
|
||||
}
|
||||
|
||||
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
|
||||
return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
|
||||
}
|
||||
|
||||
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
|
||||
return madd(1.0f-t,v0,t*v1);
|
||||
}
|
||||
|
||||
__forceinline int maxDim ( const Vec3fx& a )
|
||||
{
|
||||
const Vec3fx b = abs(a);
|
||||
if (b.x > b.y) {
|
||||
if (b.x > b.z) return 0; else return 2;
|
||||
} else {
|
||||
if (b.y > b.z) return 1; else return 2;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Rounding Functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
|
||||
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
|
||||
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Output Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
|
||||
return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
|
||||
}
|
||||
|
||||
typedef Vec3fx Vec3ff;
|
||||
|
||||
//__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
|
||||
// : x(a.x), y(a.y) {}
|
||||
|
||||
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
|
||||
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
|
||||
}
|
|
@ -4,7 +4,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
# include "vec3ia_sycl.h"
|
||||
#else
|
||||
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -194,3 +199,5 @@ namespace embree
|
|||
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/alloc.h"
|
||||
#include "emath.h"
|
||||
#include "../simd/sse.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE Vec3ia Type
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct __aligned(16) Vec3ia
|
||||
{
|
||||
ALIGNED_STRUCT_(16);
|
||||
|
||||
struct { int x,y,z; };
|
||||
|
||||
typedef int Scalar;
|
||||
enum { N = 3 };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constructors, Assignment & Cast Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ia( ) {}
|
||||
//__forceinline Vec3ia( const __m128i a ) : m128(a) {}
|
||||
|
||||
__forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {}
|
||||
__forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; }
|
||||
|
||||
__forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {}
|
||||
__forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {}
|
||||
//__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
|
||||
__forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||
|
||||
__forceinline explicit Vec3ia( const Vec3fa& a );
|
||||
|
||||
//__forceinline operator const __m128i&() const { return m128; }
|
||||
//__forceinline operator __m128i&() { return m128; }
|
||||
__forceinline operator vint4() const { return vint4(x,y,z,z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {}
|
||||
__forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {}
|
||||
__forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {}
|
||||
__forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Array Access
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
|
||||
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); }
|
||||
__forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); }
|
||||
__forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); }
|
||||
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
|
||||
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
|
||||
|
||||
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); }
|
||||
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
|
||||
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
|
||||
|
||||
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); }
|
||||
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
|
||||
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
|
||||
|
||||
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); }
|
||||
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
|
||||
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
|
||||
|
||||
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); }
|
||||
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
|
||||
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
|
||||
|
||||
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); }
|
||||
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
|
||||
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
|
||||
|
||||
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); }
|
||||
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); }
|
||||
|
||||
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); }
|
||||
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); }
|
||||
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Assignment Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
|
||||
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
|
||||
|
||||
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
|
||||
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
|
||||
|
||||
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
|
||||
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
|
||||
|
||||
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
|
||||
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
|
||||
|
||||
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
|
||||
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
|
||||
|
||||
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
|
||||
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Reductions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
|
||||
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
|
||||
__forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); }
|
||||
__forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; }
|
||||
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; }
|
||||
|
||||
/*
|
||||
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
|
||||
if (a.x != b.x) return a.x < b.x;
|
||||
if (a.y != b.y) return a.y < b.y;
|
||||
if (a.z != b.z) return a.z < b.z;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
|
||||
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
|
||||
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
|
||||
const int x = m.x ? t.x : f.x;
|
||||
const int y = m.y ? t.y : f.y;
|
||||
const int z = m.z ? t.z : f.z;
|
||||
return Vec3ia(x,y,z);
|
||||
}
|
||||
|
||||
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); }
|
||||
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Output Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
|
||||
return cout;
|
||||
}
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "math.h"
|
||||
#include "emath.h"
|
||||
#include "vec3.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -221,6 +221,8 @@ namespace embree
|
|||
{
|
||||
template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
|
||||
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
#if defined(__AVX__)
|
||||
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
|
||||
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||
|
@ -240,4 +242,25 @@ namespace embree
|
|||
#if defined(__AVX512F__)
|
||||
template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__SSE__)
|
||||
template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) {
|
||||
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) {
|
||||
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX512F__)
|
||||
template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) {
|
||||
x = a.x; y = a.y; z = a.z; w = a.w;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ __forceinline __m128 _mm_broadcast_ss (float const * mem_addr)
|
|||
#define _MM_SET_EXCEPTION_MASK(x)
|
||||
// #define _MM_SET_FLUSH_ZERO_MODE(x)
|
||||
|
||||
/*
|
||||
__forceinline int _mm_getcsr()
|
||||
{
|
||||
return 0;
|
||||
|
@ -43,6 +44,7 @@ __forceinline void _mm_mfence()
|
|||
{
|
||||
__sync_synchronize();
|
||||
}
|
||||
*/
|
||||
|
||||
__forceinline __m128i _mm_load4epu8_epi32(__m128i *ptr)
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -31,4 +31,3 @@
|
|||
#if defined(__AVX512F__)
|
||||
#include "avx512.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
|
||||
/* include SSE wrapper classes */
|
||||
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||
|
|
|
@ -15,7 +15,7 @@ namespace embree
|
|||
__forceinline const float& operator [](size_t index) const { assert(index < N); return f[index]; }
|
||||
__forceinline float& operator [](size_t index) { assert(index < N); return f[index]; }
|
||||
};
|
||||
|
||||
|
||||
template<int N>
|
||||
struct vdouble_impl
|
||||
{
|
||||
|
@ -31,7 +31,7 @@ namespace embree
|
|||
__forceinline const int& operator [](size_t index) const { assert(index < N); return i[index]; }
|
||||
__forceinline int& operator [](size_t index) { assert(index < N); return i[index]; }
|
||||
};
|
||||
|
||||
|
||||
template<int N>
|
||||
struct vuint_impl
|
||||
{
|
||||
|
|
|
@ -119,7 +119,7 @@ namespace embree
|
|||
#if defined(__aarch64__)
|
||||
template<int i0, int i1, int i2, int i3>
|
||||
__forceinline vboolf4 shuffle(const vboolf4& v) {
|
||||
return vreinterpretq_f32_u8(vqtbl1q_u8( vreinterpretq_u8_s32(v), _MN_SHUFFLE(i0, i1, i2, i3)));
|
||||
return vreinterpretq_f32_u8(vqtbl1q_u8( vreinterpretq_u8_s32((int32x4_t)v.v), _MN_SHUFFLE(i0, i1, i2, i3)));
|
||||
}
|
||||
|
||||
template<int i0, int i1, int i2, int i3>
|
||||
|
|
|
@ -316,6 +316,17 @@ namespace embree
|
|||
return madd(t,b-a,a);
|
||||
}
|
||||
|
||||
__forceinline bool isvalid (const vfloat16& v) {
|
||||
return all((v > vfloat16(-FLT_LARGE)) & (v < vfloat16(+FLT_LARGE)));
|
||||
}
|
||||
|
||||
__forceinline void xchg(vboolf16 m, vfloat16& a, vfloat16& b)
|
||||
{
|
||||
vfloat16 c = a;
|
||||
a = select(m,b,a);
|
||||
b = select(m,c,b);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Rounding Functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -32,6 +32,8 @@ namespace embree
|
|||
|
||||
__forceinline vfloat() {}
|
||||
__forceinline vfloat(const vfloat4& other) { v = other.v; }
|
||||
//__forceinline vfloat(const vfloat4& other) = default;
|
||||
|
||||
__forceinline vfloat4& operator =(const vfloat4& other) { v = other.v; return *this; }
|
||||
|
||||
__forceinline vfloat(__m128 a) : v(a) {}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
|
||||
#define vboolf vboolf_impl
|
||||
#define vboold vboold_impl
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
|
||||
#define vboolf vboolf_impl
|
||||
#define vboold vboold_impl
|
||||
|
|
|
@ -12,33 +12,177 @@
|
|||
|
||||
namespace embree
|
||||
{
|
||||
void* alignedMalloc(size_t size, size_t align)
|
||||
size_t total_allocations = 0;
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
__thread sycl::context* tls_context_tutorial = nullptr;
|
||||
__thread sycl::device* tls_device_tutorial = nullptr;
|
||||
|
||||
__thread sycl::context* tls_context_embree = nullptr;
|
||||
__thread sycl::device* tls_device_embree = nullptr;
|
||||
|
||||
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
|
||||
{
|
||||
// -- GODOT start --
|
||||
// if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
|
||||
// if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
|
||||
if (tls_context_embree != nullptr) {
|
||||
abort();
|
||||
}
|
||||
if (tls_device_embree != nullptr) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
tls_context_embree = context;
|
||||
tls_device_embree = device;
|
||||
}
|
||||
|
||||
void disableUSMAllocEmbree()
|
||||
{
|
||||
// -- GODOT start --
|
||||
// if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
|
||||
// if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
|
||||
if (tls_context_embree == nullptr) {
|
||||
abort();
|
||||
}
|
||||
if (tls_device_embree == nullptr) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
tls_context_embree = nullptr;
|
||||
tls_device_embree = nullptr;
|
||||
}
|
||||
|
||||
void enableUSMAllocTutorial(sycl::context* context, sycl::device* device)
|
||||
{
|
||||
//if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
|
||||
//if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
|
||||
tls_context_tutorial = context;
|
||||
tls_device_tutorial = device;
|
||||
}
|
||||
|
||||
void disableUSMAllocTutorial()
|
||||
{
|
||||
// -- GODOT start --
|
||||
// if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
|
||||
// if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
|
||||
if (tls_context_tutorial == nullptr) {
|
||||
abort();
|
||||
}
|
||||
if (tls_device_tutorial == nullptr) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
|
||||
tls_context_tutorial = nullptr;
|
||||
tls_device_tutorial = nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void* alignedMalloc(size_t size, size_t align)
|
||||
{
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
|
||||
assert((align & (align-1)) == 0);
|
||||
void* ptr = _mm_malloc(size,align);
|
||||
|
||||
if (size != 0 && ptr == nullptr)
|
||||
// -- GODOT start --
|
||||
// throw std::bad_alloc();
|
||||
// -- GODOT start --
|
||||
// if (size != 0 && ptr == nullptr)
|
||||
// throw std::bad_alloc();
|
||||
if (size != 0 && ptr == nullptr) {
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
|
||||
}
|
||||
// -- GODOT end --
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
void alignedFree(void* ptr)
|
||||
{
|
||||
if (ptr)
|
||||
_mm_free(ptr);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode)
|
||||
{
|
||||
assert(context);
|
||||
assert(device);
|
||||
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
assert((align & (align-1)) == 0);
|
||||
total_allocations++;
|
||||
|
||||
void* ptr = nullptr;
|
||||
if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
|
||||
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
|
||||
else
|
||||
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
|
||||
|
||||
// -- GODOT start --
|
||||
// if (size != 0 && ptr == nullptr)
|
||||
// throw std::bad_alloc();
|
||||
if (size != 0 && ptr == nullptr) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static MutexSys g_alloc_mutex;
|
||||
|
||||
void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode)
|
||||
{
|
||||
if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode);
|
||||
if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void alignedSYCLFree(sycl::context* context, void* ptr)
|
||||
{
|
||||
assert(context);
|
||||
if (ptr) {
|
||||
sycl::free(ptr,*context);
|
||||
}
|
||||
}
|
||||
|
||||
void alignedSYCLFree(void* ptr)
|
||||
{
|
||||
if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr);
|
||||
if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode)
|
||||
{
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
if (tls_context_embree || tls_context_tutorial)
|
||||
return alignedSYCLMalloc(size,align,mode);
|
||||
else
|
||||
#endif
|
||||
return alignedMalloc(size,align);
|
||||
}
|
||||
|
||||
void alignedUSMFree(void* ptr)
|
||||
{
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
if (tls_context_embree || tls_context_tutorial)
|
||||
return alignedSYCLFree(ptr);
|
||||
else
|
||||
#endif
|
||||
return alignedFree(ptr);
|
||||
}
|
||||
|
||||
static bool huge_pages_enabled = false;
|
||||
static MutexSys os_init_mutex;
|
||||
|
||||
__forceinline bool isHugePageCandidate(const size_t bytes)
|
||||
__forceinline bool isHugePageCandidate(const size_t bytes)
|
||||
{
|
||||
if (!huge_pages_enabled)
|
||||
return false;
|
||||
|
@ -133,7 +277,9 @@ namespace embree
|
|||
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
|
||||
// -- GODOT start --
|
||||
// if (ptr == nullptr) throw std::bad_alloc();
|
||||
if (ptr == nullptr) abort();
|
||||
if (ptr == nullptr) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
hugepages = false;
|
||||
return ptr;
|
||||
|
@ -150,11 +296,13 @@ namespace embree
|
|||
if (bytesNew >= bytesOld)
|
||||
return bytesOld;
|
||||
|
||||
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
|
||||
// -- GODOT start --
|
||||
// throw std::bad_alloc();
|
||||
// -- GODOT start --
|
||||
// if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
|
||||
// throw std::bad_alloc();
|
||||
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
// -- GODOT end --
|
||||
|
||||
return bytesNew;
|
||||
}
|
||||
|
@ -164,11 +312,13 @@ namespace embree
|
|||
if (bytes == 0)
|
||||
return;
|
||||
|
||||
if (!VirtualFree(ptr,0,MEM_RELEASE))
|
||||
// -- GODOT start --
|
||||
// throw std::bad_alloc();
|
||||
// -- GODOT start --
|
||||
// if (!VirtualFree(ptr,0,MEM_RELEASE))
|
||||
// throw std::bad_alloc();
|
||||
if (!VirtualFree(ptr,0,MEM_RELEASE)) {
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
// -- GODOT end --
|
||||
}
|
||||
|
||||
void os_advise(void *ptr, size_t bytes)
|
||||
|
@ -274,7 +424,9 @@ namespace embree
|
|||
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
// -- GODOT start --
|
||||
// if (ptr == MAP_FAILED) throw std::bad_alloc();
|
||||
if (ptr == MAP_FAILED) abort();
|
||||
if (ptr == MAP_FAILED) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
hugepages = false;
|
||||
|
||||
|
@ -291,11 +443,13 @@ namespace embree
|
|||
if (bytesNew >= bytesOld)
|
||||
return bytesOld;
|
||||
|
||||
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
|
||||
// -- GODOT start --
|
||||
// throw std::bad_alloc();
|
||||
// -- GODOT start --
|
||||
// if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
|
||||
// throw std::bad_alloc();
|
||||
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
// -- GODOT end --
|
||||
|
||||
return bytesNew;
|
||||
}
|
||||
|
@ -308,11 +462,13 @@ namespace embree
|
|||
/* for hugepages we need to also align the size */
|
||||
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
|
||||
bytes = (bytes+pageSize-1) & ~(pageSize-1);
|
||||
if (munmap(ptr,bytes) == -1)
|
||||
// -- GODOT start --
|
||||
// throw std::bad_alloc();
|
||||
// -- GODOT start --
|
||||
// if (munmap(ptr,bytes) == -1)
|
||||
// throw std::bad_alloc();
|
||||
if (munmap(ptr,bytes) == -1) {
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
// -- GODOT end --
|
||||
}
|
||||
|
||||
/* hint for transparent huge pages (THP) */
|
||||
|
|
|
@ -9,20 +9,72 @@
|
|||
|
||||
namespace embree
|
||||
{
|
||||
#define ALIGNED_STRUCT_(align) \
|
||||
void* operator new(size_t size) { return alignedMalloc(size,align); } \
|
||||
void operator delete(void* ptr) { alignedFree(ptr); } \
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
/* enables SYCL USM allocation */
|
||||
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device);
|
||||
void enableUSMAllocTutorial(sycl::context* context, sycl::device* device);
|
||||
|
||||
/* disables SYCL USM allocation */
|
||||
void disableUSMAllocEmbree();
|
||||
void disableUSMAllocTutorial();
|
||||
|
||||
#endif
|
||||
|
||||
#define ALIGNED_STRUCT_(align) \
|
||||
void* operator new(size_t size) { return alignedMalloc(size,align); } \
|
||||
void operator delete(void* ptr) { alignedFree(ptr); } \
|
||||
void* operator new[](size_t size) { return alignedMalloc(size,align); } \
|
||||
void operator delete[](void* ptr) { alignedFree(ptr); }
|
||||
|
||||
#define ALIGNED_CLASS_(align) \
|
||||
|
||||
#define ALIGNED_STRUCT_USM_(align) \
|
||||
void* operator new(size_t size) { return alignedUSMMalloc(size,align); } \
|
||||
void operator delete(void* ptr) { alignedUSMFree(ptr); } \
|
||||
void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \
|
||||
void operator delete[](void* ptr) { alignedUSMFree(ptr); }
|
||||
|
||||
#define ALIGNED_CLASS_(align) \
|
||||
public: \
|
||||
ALIGNED_STRUCT_(align) \
|
||||
ALIGNED_STRUCT_(align) \
|
||||
private:
|
||||
|
||||
#define ALIGNED_CLASS_USM_(align) \
|
||||
public: \
|
||||
ALIGNED_STRUCT_USM_(align) \
|
||||
private:
|
||||
|
||||
enum EmbreeUSMMode {
|
||||
EMBREE_USM_SHARED = 0,
|
||||
EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0,
|
||||
EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1
|
||||
};
|
||||
|
||||
/*! aligned allocation */
|
||||
void* alignedMalloc(size_t size, size_t align);
|
||||
void alignedFree(void* ptr);
|
||||
|
||||
/*! aligned allocation using SYCL USM */
|
||||
void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY);
|
||||
void alignedUSMFree(void* ptr);
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
/*! aligned allocation using SYCL USM */
|
||||
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
|
||||
void alignedSYCLFree(sycl::context* context, void* ptr);
|
||||
|
||||
// deleter functor to use as deleter in std unique or shared pointers that
|
||||
// capture raw pointers created by sycl::malloc and it's variants
|
||||
template<typename T>
|
||||
struct sycl_deleter
|
||||
{
|
||||
void operator()(T const* ptr)
|
||||
{
|
||||
alignedUSMFree((void*)ptr);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/*! allocator that performs aligned allocations */
|
||||
template<typename T, size_t alignment>
|
||||
|
@ -95,6 +147,37 @@ namespace embree
|
|||
bool hugepages;
|
||||
};
|
||||
|
||||
/*! allocator that newer performs allocations */
|
||||
template<typename T>
|
||||
struct no_allocator
|
||||
{
|
||||
typedef T value_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
|
||||
__forceinline pointer allocate( size_type n ) {
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("no allocation supported");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
|
||||
__forceinline void deallocate( pointer p, size_type n ) {
|
||||
}
|
||||
|
||||
__forceinline void construct( pointer p, const_reference val ) {
|
||||
new (p) T(val);
|
||||
}
|
||||
|
||||
__forceinline void destroy( pointer p ) {
|
||||
p->~T();
|
||||
}
|
||||
};
|
||||
|
||||
/*! allocator for IDs */
|
||||
template<typename T, size_t max_id>
|
||||
struct IDPool
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace embree
|
|||
};
|
||||
|
||||
template<typename T>
|
||||
__forceinline void atomic_min(std::atomic<T>& aref, const T& bref)
|
||||
__forceinline void _atomic_min(std::atomic<T>& aref, const T& bref)
|
||||
{
|
||||
const T b = bref.load();
|
||||
while (true) {
|
||||
|
@ -47,7 +47,7 @@ namespace embree
|
|||
}
|
||||
|
||||
template<typename T>
|
||||
__forceinline void atomic_max(std::atomic<T>& aref, const T& bref)
|
||||
__forceinline void _atomic_max(std::atomic<T>& aref, const T& bref)
|
||||
{
|
||||
const T b = bref.load();
|
||||
while (true) {
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace embree
|
|||
void* opaque;
|
||||
};
|
||||
|
||||
/*! fast active barrier using atomitc counter */
|
||||
/*! fast active barrier using atomic counter */
|
||||
struct BarrierActive
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "string.h"
|
||||
#include "estring.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <ctype.h>
|
|
@ -28,6 +28,42 @@ namespace embree
|
|||
std::streamsize precision;
|
||||
};
|
||||
|
||||
struct IndentOStream : public std::streambuf
|
||||
{
|
||||
explicit IndentOStream(std::ostream &ostream, int indent = 2)
|
||||
: streambuf(ostream.rdbuf())
|
||||
, start_of_line(true)
|
||||
, ident_str(indent, ' ')
|
||||
, stream(&ostream)
|
||||
{
|
||||
// set streambuf of ostream to this and save original streambuf
|
||||
stream->rdbuf(this);
|
||||
}
|
||||
|
||||
virtual ~IndentOStream()
|
||||
{
|
||||
if (stream != NULL) {
|
||||
// restore old streambuf
|
||||
stream->rdbuf(streambuf);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int overflow(int ch) {
|
||||
if (start_of_line && ch != '\n') {
|
||||
streambuf->sputn(ident_str.data(), ident_str.size());
|
||||
}
|
||||
start_of_line = ch == '\n';
|
||||
return streambuf->sputc(ch);
|
||||
}
|
||||
|
||||
private:
|
||||
std::streambuf *streambuf;
|
||||
bool start_of_line;
|
||||
std::string ident_str;
|
||||
std::ostream *stream;
|
||||
};
|
||||
|
||||
std::string toLowerCase(const std::string& s);
|
||||
std::string toUpperCase(const std::string& s);
|
||||
|
|
@ -64,7 +64,7 @@ namespace embree
|
|||
/// Windows Platform
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__WIN32__)
|
||||
#if defined(__WIN32__) && !defined(__INTEL_LLVM_COMPILER)
|
||||
|
||||
__forceinline size_t read_tsc()
|
||||
{
|
||||
|
@ -89,7 +89,7 @@ namespace embree
|
|||
#endif
|
||||
}
|
||||
|
||||
#if defined(__X86_64__)
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
#if defined(__AVX2__)
|
||||
return _tzcnt_u64(v);
|
||||
|
@ -113,7 +113,7 @@ namespace embree
|
|||
return i;
|
||||
}
|
||||
|
||||
#if defined(__X86_64__)
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bscf(size_t& v)
|
||||
{
|
||||
size_t i = bsf(v);
|
||||
|
@ -138,7 +138,7 @@ namespace embree
|
|||
#endif
|
||||
}
|
||||
|
||||
#if defined(__X86_64__)
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bsr(size_t v) {
|
||||
#if defined(__AVX2__)
|
||||
return 63 -_lzcnt_u64(v);
|
||||
|
@ -196,49 +196,6 @@ namespace embree
|
|||
|
||||
#else
|
||||
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
|
||||
__forceinline void __cpuid(int out[4], int op)
|
||||
{
|
||||
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "0"(op));
|
||||
}
|
||||
|
||||
__forceinline void __cpuid_count(int out[4], int op1, int op2)
|
||||
{
|
||||
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
|
||||
: "0" (op1), "2" (op2));
|
||||
}
|
||||
|
||||
#elif defined(__X86_ASM__)
|
||||
|
||||
__forceinline void __cpuid(int out[4], int op) {
|
||||
#if defined(__ARM_NEON)
|
||||
if (op == 0) { // Get CPU name
|
||||
out[0] = 0x41524d20;
|
||||
out[1] = 0x41524d20;
|
||||
out[2] = 0x41524d20;
|
||||
out[3] = 0x41524d20;
|
||||
}
|
||||
#else
|
||||
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(__ARM_NEON)
|
||||
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
|
||||
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
__forceinline uint64_t read_tsc() {
|
||||
#if defined(__X86_ASM__)
|
||||
uint32_t high,low;
|
||||
|
@ -263,6 +220,13 @@ namespace embree
|
|||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
__forceinline unsigned int bsf(unsigned v) {
|
||||
return sycl::ctz(v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__64BIT__)
|
||||
__forceinline unsigned bsf(unsigned v)
|
||||
|
@ -280,6 +244,13 @@ namespace embree
|
|||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
return sycl::ctz(v);
|
||||
}
|
||||
#else
|
||||
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
|
@ -294,6 +265,7 @@ namespace embree
|
|||
return __builtin_ctzl(v);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline int bscf(int& v)
|
||||
{
|
||||
|
@ -434,6 +406,41 @@ namespace embree
|
|||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__)
|
||||
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
|
||||
__forceinline void __cpuid(int out[4], int op)
|
||||
{
|
||||
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "0"(op));
|
||||
}
|
||||
|
||||
__forceinline void __cpuid_count(int out[4], int op1, int op2)
|
||||
{
|
||||
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
|
||||
: "0" (op1), "2" (op2));
|
||||
}
|
||||
|
||||
#elif defined(__X86_ASM__)
|
||||
|
||||
__forceinline void __cpuid(int out[4], int op) {
|
||||
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
|
||||
}
|
||||
|
||||
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
|
||||
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// All Platforms
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -459,8 +466,16 @@ namespace embree
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
__forceinline unsigned int popcnt(unsigned int in) {
|
||||
return sycl::popcount(in);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__SSE4_2__) || defined(__ARM_NEON)
|
||||
|
||||
__forceinline int popcnt(int in) {
|
||||
return _mm_popcnt_u32(in);
|
||||
}
|
||||
|
@ -475,6 +490,8 @@ namespace embree
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__X86_ASM__)
|
||||
|
|
|
@ -86,8 +86,8 @@ namespace embree
|
|||
|
||||
class PaddedSpinLock : public SpinLock
|
||||
{
|
||||
private:
|
||||
char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
|
||||
private:
|
||||
MAYBE_UNUSED char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
|
||||
};
|
||||
/*! safe mutex lock and unlock helper */
|
||||
template<typename Mutex> class Lock {
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
#include <cassert>
|
||||
|
@ -18,6 +20,30 @@
|
|||
#include <cstring>
|
||||
#include <stdint.h>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
#define __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#pragma clang diagnostic ignored "-W#pragma-messages"
|
||||
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
#include "sycl.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
#define CONSTANT __attribute__((opencl_constant))
|
||||
#else
|
||||
#define CONSTANT
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// detect platform
|
||||
|
@ -115,7 +141,7 @@
|
|||
#else
|
||||
#define __restrict__ //__restrict // causes issues with MSVC
|
||||
#endif
|
||||
#if !defined(__thread)
|
||||
#if !defined(__thread) && !defined(__INTEL_LLVM_COMPILER)
|
||||
#define __thread __declspec(thread)
|
||||
#endif
|
||||
#if !defined(__aligned)
|
||||
|
@ -148,6 +174,10 @@
|
|||
#define MAYBE_UNUSED
|
||||
#endif
|
||||
|
||||
#if !defined(_unused)
|
||||
#define _unused(x) ((void)(x))
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly
|
||||
#define DELETED
|
||||
#else
|
||||
|
@ -155,7 +185,7 @@
|
|||
#endif
|
||||
|
||||
#if !defined(likely)
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) || defined(__SYCL_DEVICE_ONLY__)
|
||||
#define likely(expr) (expr)
|
||||
#define unlikely(expr) (expr)
|
||||
#else
|
||||
|
@ -171,22 +201,27 @@
|
|||
/* debug printing macros */
|
||||
#define STRING(x) #x
|
||||
#define TOSTRING(x) STRING(x)
|
||||
#define PING embree_cout << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
|
||||
#define PING embree_cout_uniform << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
|
||||
#define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl
|
||||
#define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
|
||||
#define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
|
||||
#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
|
||||
|
||||
#define UPRINT(x) embree_cout_uniform << STRING(x) << " = " << (x) << embree_endl
|
||||
#define UPRINT2(x,y) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
|
||||
#define UPRINT3(x,y,z) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
|
||||
#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
|
||||
|
||||
#if defined(DEBUG) // only report file and line in debug mode
|
||||
// -- GODOT start --
|
||||
// #define THROW_RUNTIME_ERROR(str)
|
||||
// #define THROW_RUNTIME_ERROR(str) \
|
||||
// throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
#define THROW_RUNTIME_ERROR(str) \
|
||||
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
|
||||
// -- GODOT end --
|
||||
#else
|
||||
// -- GODOT start --
|
||||
// #define THROW_RUNTIME_ERROR(str)
|
||||
// #define THROW_RUNTIME_ERROR(str) \
|
||||
// throw std::runtime_error(str);
|
||||
#define THROW_RUNTIME_ERROR(str) \
|
||||
abort();
|
||||
|
@ -323,13 +358,209 @@ __forceinline std::string toString(long long value) {
|
|||
#define DISABLE_DEPRECATED_WARNING __pragma(warning (disable: 4996)) // warning: function was declared deprecated
|
||||
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SYCL specific
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
#define sycl_printf0(format, ...) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
|
||||
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
#define sycl_printf0_(format) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
|
||||
sycl::ext::oneapi::experimental::printf(fmt); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define sycl_printf0(format, ...) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
#define sycl_printf0_(format) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define sycl_printf(format, ...) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
|
||||
}
|
||||
|
||||
#define sycl_printf_(format) { \
|
||||
static const CONSTANT char fmt[] = format; \
|
||||
sycl::ext::oneapi::experimental::printf(fmt); \
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct sycl_ostream_ {
|
||||
sycl_ostream_ (bool uniform) : uniform(uniform) {}
|
||||
bool uniform = false;
|
||||
};
|
||||
struct sycl_endl_ {};
|
||||
|
||||
#define embree_ostream embree::sycl_ostream_
|
||||
#define embree_cout embree::sycl_ostream_(false)
|
||||
#define embree_cout_uniform embree::sycl_ostream_(true)
|
||||
#define embree_endl embree::sycl_endl_()
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, int i)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%i",i);
|
||||
}
|
||||
else
|
||||
sycl_printf("%i ",i);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, unsigned int i)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%u",i);
|
||||
} else
|
||||
sycl_printf("%u ",i);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, float f)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%f",f);
|
||||
} else
|
||||
sycl_printf("%f ",f);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, double d)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%f",d);
|
||||
} else
|
||||
sycl_printf("%f ",d);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, uint64_t l)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%lu",l);
|
||||
} else
|
||||
sycl_printf("%lu ",l);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, long l)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%l",l);
|
||||
} else
|
||||
sycl_printf("%l ",l);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, void* p)
|
||||
{
|
||||
if (cout.uniform) {
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%p",p);
|
||||
} else
|
||||
sycl_printf("%p ",p);
|
||||
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, const char* c)
|
||||
{
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf("%s",c);
|
||||
return cout;
|
||||
}
|
||||
|
||||
inline sycl_ostream_ operator <<(sycl_ostream_ cout, sycl_endl_)
|
||||
{
|
||||
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
|
||||
sycl_printf_("\n");
|
||||
return cout;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* embree output stream */
|
||||
#define embree_ostream std::ostream&
|
||||
#define embree_cout std::cout
|
||||
#define embree_cout_uniform std::cout
|
||||
#define embree_endl std::endl
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
/* printing out sycle vector types */
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float4& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float3& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float2& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int4& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int3& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int2& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint4& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint3& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
|
||||
}
|
||||
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint2& v) {
|
||||
return out << "(" << v.x() << "," << v.y() << ")";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
inline void tab(std::ostream& cout, int n) {
|
||||
for (int i=0; i<n; i++) cout << " ";
|
||||
}
|
||||
|
||||
inline std::string tab(int depth) {
|
||||
return std::string(2*depth,' ');
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Some macros for static profiling
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -0,0 +1,307 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
using sycl::float16;
|
||||
using sycl::float8;
|
||||
using sycl::float4;
|
||||
using sycl::float3;
|
||||
using sycl::float2;
|
||||
using sycl::int16;
|
||||
using sycl::int8;
|
||||
using sycl::int4;
|
||||
using sycl::int3;
|
||||
using sycl::int2;
|
||||
using sycl::uint16;
|
||||
using sycl::uint8;
|
||||
using sycl::uint4;
|
||||
using sycl::uint3;
|
||||
using sycl::uint2;
|
||||
using sycl::uchar16;
|
||||
using sycl::uchar8;
|
||||
using sycl::uchar4;
|
||||
using sycl::uchar3;
|
||||
using sycl::uchar2;
|
||||
using sycl::ushort16;
|
||||
using sycl::ushort8;
|
||||
using sycl::ushort4;
|
||||
using sycl::ushort3;
|
||||
using sycl::ushort2;
|
||||
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
#define GLOBAL __attribute__((opencl_global))
|
||||
#define LOCAL __attribute__((opencl_local))
|
||||
|
||||
SYCL_EXTERNAL extern int work_group_reduce_add(int x);
|
||||
SYCL_EXTERNAL extern float work_group_reduce_min(float x);
|
||||
SYCL_EXTERNAL extern float work_group_reduce_max(float x);
|
||||
|
||||
SYCL_EXTERNAL extern float atomic_min(volatile GLOBAL float *p, float val);
|
||||
SYCL_EXTERNAL extern float atomic_min(volatile LOCAL float *p, float val);
|
||||
SYCL_EXTERNAL extern float atomic_max(volatile GLOBAL float *p, float val);
|
||||
SYCL_EXTERNAL extern float atomic_max(volatile LOCAL float *p, float val);
|
||||
|
||||
SYCL_EXTERNAL extern "C" unsigned int intel_sub_group_ballot(bool valid);
|
||||
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_assume_uniform(void *p);
|
||||
|
||||
// Load message caching control
|
||||
|
||||
enum LSC_LDCC {
|
||||
LSC_LDCC_DEFAULT,
|
||||
LSC_LDCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
|
||||
LSC_LDCC_L1UC_L3C, // Override to L1 uncached and L3 cached
|
||||
LSC_LDCC_L1C_L3UC, // Override to L1 cached and L3 uncached
|
||||
LSC_LDCC_L1C_L3C, // Override to L1 cached and L3 cached
|
||||
LSC_LDCC_L1S_L3UC, // Override to L1 streaming load and L3 uncached
|
||||
LSC_LDCC_L1S_L3C, // Override to L1 streaming load and L3 cached
|
||||
LSC_LDCC_L1IAR_L3C, // Override to L1 invalidate-after-read, and L3 cached
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Store message caching control (also used for atomics)
|
||||
|
||||
enum LSC_STCC {
|
||||
LSC_STCC_DEFAULT,
|
||||
LSC_STCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
|
||||
LSC_STCC_L1UC_L3WB, // Override to L1 uncached and L3 written back
|
||||
LSC_STCC_L1WT_L3UC, // Override to L1 written through and L3 uncached
|
||||
LSC_STCC_L1WT_L3WB, // Override to L1 written through and L3 written back
|
||||
LSC_STCC_L1S_L3UC, // Override to L1 streaming and L3 uncached
|
||||
LSC_STCC_L1S_L3WB, // Override to L1 streaming and L3 written back
|
||||
LSC_STCC_L1WB_L3WB, // Override to L1 written through and L3 written back
|
||||
};
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
// LSC Loads
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uchar_to_uint (const GLOBAL uint8_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D8U32
|
||||
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_ushort_to_uint(const GLOBAL uint16_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D16U32
|
||||
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uint (const GLOBAL uint32_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V1
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint2 __builtin_IB_lsc_load_global_uint2 (const GLOBAL sycl::uint2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V2
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint3 __builtin_IB_lsc_load_global_uint3 (const GLOBAL sycl::uint3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V3
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint4 __builtin_IB_lsc_load_global_uint4 (const GLOBAL sycl::uint4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V4
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::uint8 __builtin_IB_lsc_load_global_uint8 (const GLOBAL sycl::uint8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V8
|
||||
SYCL_EXTERNAL /* extern "C" */ uint64_t __builtin_IB_lsc_load_global_ulong (const GLOBAL uint64_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V1
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong2 __builtin_IB_lsc_load_global_ulong2 (const GLOBAL sycl::ulong2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V2
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong3 __builtin_IB_lsc_load_global_ulong3 (const GLOBAL sycl::ulong3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V3
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong4 __builtin_IB_lsc_load_global_ulong4 (const GLOBAL sycl::ulong4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V4
|
||||
SYCL_EXTERNAL /* extern "C" */ sycl::ulong8 __builtin_IB_lsc_load_global_ulong8 (const GLOBAL sycl::ulong8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V8
|
||||
|
||||
// global address space
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uchar_from_uint (GLOBAL uint8_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D8U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ushort_from_uint(GLOBAL uint16_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D16U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint (GLOBAL uint32_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D32V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint2 (GLOBAL sycl::uint2 *base, int immElemOff, sycl::uint2 val, enum LSC_STCC cacheOpt); //D32V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint3 (GLOBAL sycl::uint3 *base, int immElemOff, sycl::uint3 val, enum LSC_STCC cacheOpt); //D32V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint4 (GLOBAL sycl::uint4 *base, int immElemOff, sycl::uint4 val, enum LSC_STCC cacheOpt); //D32V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint8 (GLOBAL sycl::uint8 *base, int immElemOff, sycl::uint8 val, enum LSC_STCC cacheOpt); //D32V8
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong (GLOBAL uint64_t *base, int immElemOff, uint64_t val, enum LSC_STCC cacheOpt); //D64V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong2 (GLOBAL sycl::ulong2 *base, int immElemOff, sycl::ulong2 val, enum LSC_STCC cacheOpt); //D64V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong3 (GLOBAL sycl::ulong3 *base, int immElemOff, sycl::ulong3 val, enum LSC_STCC cacheOpt); //D64V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong4 (GLOBAL sycl::ulong4 *base, int immElemOff, sycl::ulong4 val, enum LSC_STCC cacheOpt); //D64V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong8 (GLOBAL sycl::ulong8 *base, int immElemOff, sycl::ulong8 val, enum LSC_STCC cacheOpt); //D64V8
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// prefetching
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// LSC Pre-Fetch Load functions with CacheControls
|
||||
// global address space
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uchar (const GLOBAL uint8_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ushort(const GLOBAL uint16_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint (const GLOBAL uint32_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint2 (const GLOBAL sycl::uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint3 (const GLOBAL sycl::uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint4 (const GLOBAL sycl::uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint8 (const GLOBAL sycl::uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong (const GLOBAL uint64_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong2(const GLOBAL sycl::ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong3(const GLOBAL sycl::ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong4(const GLOBAL sycl::ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4
|
||||
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong8(const GLOBAL sycl::ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8
|
||||
|
||||
#else
|
||||
|
||||
#define GLOBAL
|
||||
#define LOCAL
|
||||
|
||||
/* dummy functions for host */
|
||||
inline int work_group_reduce_add(int x) { return x; }
|
||||
inline float work_group_reduce_min(float x) { return x; }
|
||||
inline float work_group_reduce_max(float x) { return x; }
|
||||
|
||||
inline float atomic_min(volatile float *p, float val) { return val; };
|
||||
inline float atomic_max(volatile float *p, float val) { return val; };
|
||||
|
||||
inline uint32_t intel_sub_group_ballot(bool valid) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
/* creates a temporary that is enforced to be uniform */
|
||||
#define SYCL_UNIFORM_VAR(Ty,tmp,k) \
|
||||
Ty tmp##_data; \
|
||||
Ty* p##tmp##_data = (Ty*) sub_group_broadcast((uint64_t)&tmp##_data,k); \
|
||||
Ty& tmp = *p##tmp##_data;
|
||||
|
||||
#if !defined(__forceinline)
|
||||
#define __forceinline inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#if __SYCL_COMPILER_VERSION < 20210801
|
||||
#define all_of_group all_of
|
||||
#define any_of_group any_of
|
||||
#define none_of_group none_of
|
||||
#define group_broadcast broadcast
|
||||
#define reduce_over_group reduce
|
||||
#define exclusive_scan_over_group exclusive_scan
|
||||
#define inclusive_scan_over_group inclusive_scan
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename T>
|
||||
__forceinline T cselect(const bool mask, const T &a, const T &b)
|
||||
{
|
||||
return sycl::select(b,a,(int)mask);
|
||||
}
|
||||
|
||||
template<typename T, typename M>
|
||||
__forceinline T cselect(const M &mask, const T &a, const T &b)
|
||||
{
|
||||
return sycl::select(b,a,mask);
|
||||
}
|
||||
|
||||
__forceinline const sycl::sub_group this_sub_group() {
|
||||
return sycl::ext::oneapi::experimental::this_sub_group();
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_sub_group_local_id() {
|
||||
return this_sub_group().get_local_id()[0];
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_sub_group_size() {
|
||||
return this_sub_group().get_max_local_range().size();
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_sub_group_id() {
|
||||
return this_sub_group().get_group_id()[0];
|
||||
}
|
||||
|
||||
__forceinline const uint32_t get_num_sub_groups() {
|
||||
return this_sub_group().get_group_range().size();
|
||||
}
|
||||
|
||||
__forceinline uint32_t sub_group_ballot(bool pred) {
|
||||
return intel_sub_group_ballot(pred);
|
||||
}
|
||||
|
||||
__forceinline bool sub_group_all_of(bool pred) {
|
||||
return sycl::all_of_group(this_sub_group(),pred);
|
||||
}
|
||||
|
||||
__forceinline bool sub_group_any_of(bool pred) {
|
||||
return sycl::any_of_group(this_sub_group(),pred);
|
||||
}
|
||||
|
||||
__forceinline bool sub_group_none_of(bool pred) {
|
||||
return sycl::none_of_group(this_sub_group(),pred);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_broadcast(T x, sycl::id<1> local_id) {
|
||||
return sycl::group_broadcast<sycl::sub_group>(this_sub_group(),x,local_id);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_make_uniform(T x) {
|
||||
return sub_group_broadcast(x,sycl::ctz(intel_sub_group_ballot(true)));
|
||||
}
|
||||
|
||||
__forceinline void assume_uniform_array(void* ptr) {
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
__builtin_IB_assume_uniform(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, BinaryOperation binary_op) {
|
||||
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, T init, BinaryOperation binary_op) {
|
||||
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,init,binary_op);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_min(T x, T init) {
|
||||
return sub_group_reduce(x, init, sycl::ext::oneapi::minimum<T>());
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_min(T x) {
|
||||
return sub_group_reduce(x, sycl::ext::oneapi::minimum<T>());
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_max(T x) {
|
||||
return sub_group_reduce(x, sycl::ext::oneapi::maximum<T>());
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_reduce_add(T x) {
|
||||
return sub_group_reduce(x, sycl::ext::oneapi::plus<T>());
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, BinaryOperation binary_op) {
|
||||
return sycl::exclusive_scan_over_group(this_sub_group(),x,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan_min(T x) {
|
||||
return sub_group_exclusive_scan(x,sycl::ext::oneapi::minimum<T>());
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, T init, BinaryOperation binary_op) {
|
||||
return sycl::exclusive_scan_over_group(this_sub_group(),x,init,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op) {
|
||||
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op);
|
||||
}
|
||||
|
||||
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op, T init) {
|
||||
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_shuffle(T x, sycl::id<1> local_id) {
|
||||
return this_sub_group().shuffle(x, local_id);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_shuffle_down(T x, uint32_t delta) {
|
||||
return this_sub_group().shuffle_down(x, delta);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_shuffle_up(T x, uint32_t delta) {
|
||||
return this_sub_group().shuffle_up(x, delta);
|
||||
}
|
||||
|
||||
template <typename T> __forceinline T sub_group_load(const void* src) {
|
||||
return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
|
||||
}
|
||||
|
||||
template <typename T> __forceinline void sub_group_store(void* dst, const T& x) {
|
||||
this_sub_group().store(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)dst),x);
|
||||
}
|
||||
}
|
||||
|
||||
#if __SYCL_COMPILER_VERSION < 20210801
|
||||
#undef all_of_group
|
||||
#undef any_of_group
|
||||
#undef none_of_group
|
||||
#undef group_broadcast
|
||||
#undef reduce_over_group
|
||||
#undef exclusive_scan_over_group
|
||||
#undef inclusive_scan_over_group
|
||||
#endif
|
|
@ -1,9 +1,15 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
#include "sysinfo.h"
|
||||
#include "intrinsics.h"
|
||||
#include "string.h"
|
||||
#include "estring.h"
|
||||
#include "ref.h"
|
||||
#if defined(__FREEBSD__)
|
||||
#include <sys/cpuset.h>
|
||||
|
@ -690,3 +696,6 @@ namespace embree
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "thread.h"
|
||||
#include "sysinfo.h"
|
||||
#include "string.h"
|
||||
#include "estring.h"
|
||||
|
||||
#include <iostream>
|
||||
#if defined(__ARM_NEON)
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
namespace embree
|
||||
{
|
||||
class Device;
|
||||
|
||||
template<typename T, typename allocator>
|
||||
class vector_t
|
||||
{
|
||||
|
@ -25,6 +27,12 @@ namespace embree
|
|||
template<typename M>
|
||||
__forceinline explicit vector_t (M alloc, size_t sz)
|
||||
: alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
|
||||
|
||||
__forceinline vector_t (Device* alloc)
|
||||
: vector_t(alloc,0) {}
|
||||
|
||||
__forceinline vector_t(void* data, size_t bytes)
|
||||
: size_active(0), size_alloced(bytes/sizeof(T)), items((T*)data) {}
|
||||
|
||||
__forceinline ~vector_t() {
|
||||
clear();
|
||||
|
@ -65,6 +73,10 @@ namespace embree
|
|||
return *this;
|
||||
}
|
||||
|
||||
__forceinline allocator& getAlloc() {
|
||||
return alloc;
|
||||
}
|
||||
|
||||
/********************** Iterators ****************************/
|
||||
|
||||
__forceinline iterator begin() { return items; };
|
||||
|
@ -215,6 +227,10 @@ namespace embree
|
|||
if (new_alloced <= size_alloced)
|
||||
return size_alloced;
|
||||
|
||||
/* if current size is 0 allocate exact requested size */
|
||||
if (size_alloced == 0)
|
||||
return new_alloced;
|
||||
|
||||
/* resize to next power of 2 otherwise */
|
||||
size_t new_size_alloced = size_alloced;
|
||||
while (new_size_alloced < new_alloced) {
|
||||
|
@ -237,8 +253,12 @@ namespace embree
|
|||
/*! vector class that performs aligned allocations */
|
||||
template<typename T>
|
||||
using avector = vector_t<T,aligned_allocator<T,std::alignment_of<T>::value> >;
|
||||
|
||||
|
||||
/*! vector class that performs OS allocations */
|
||||
template<typename T>
|
||||
using ovector = vector_t<T,os_allocator<T> >;
|
||||
|
||||
/*! vector class with externally managed data buffer */
|
||||
template<typename T>
|
||||
using evector = vector_t<T,no_allocator<T>>;
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#if defined(TASKING_INTERNAL)
|
||||
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
|
||||
# include "taskschedulerinternal.h"
|
||||
#elif defined(TASKING_TBB)
|
||||
# include "taskschedulertbb.h"
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "taskschedulerinternal.h"
|
||||
#include "../math/math.h"
|
||||
#include "../math/emath.h"
|
||||
#include "../sys/sysinfo.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -50,11 +50,11 @@ namespace embree
|
|||
thread.task = this;
|
||||
// -- GODOT start --
|
||||
// try {
|
||||
// if (thread.scheduler->cancellingException == nullptr)
|
||||
// if (context->cancellingException == nullptr)
|
||||
closure->execute();
|
||||
// } catch (...) {
|
||||
// if (thread.scheduler->cancellingException == nullptr)
|
||||
// thread.scheduler->cancellingException = std::current_exception();
|
||||
// if (context->cancellingException == nullptr)
|
||||
// context->cancellingException = std::current_exception();
|
||||
// }
|
||||
// -- GODOT end --
|
||||
thread.task = prevTask;
|
||||
|
@ -152,7 +152,8 @@ namespace embree
|
|||
{
|
||||
Lock<MutexSys> lock(g_mutex);
|
||||
assert(newNumThreads);
|
||||
newNumThreads = min(newNumThreads, (size_t) getNumberOfLogicalThreads());
|
||||
if (newNumThreads == std::numeric_limits<size_t>::max())
|
||||
newNumThreads = (size_t) getNumberOfLogicalThreads();
|
||||
|
||||
numThreads = newNumThreads;
|
||||
if (!startThreads && !running) return;
|
||||
|
@ -232,7 +233,8 @@ namespace embree
|
|||
TaskScheduler::TaskScheduler()
|
||||
: threadCounter(0), anyTasksRunning(0), hasRootTask(false)
|
||||
{
|
||||
threadLocal.resize(2*getNumberOfLogicalThreads()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommitScene the worker threads also join. When disallowing rtcCommitScene to join a build we can remove the 2x.
|
||||
assert(threadPool);
|
||||
threadLocal.resize(2 * TaskScheduler::threadCount()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommitScene the worker threads also join. When disallowing rtcCommitScene to join a build we can remove the 2x.
|
||||
for (size_t i=0; i<threadLocal.size(); i++)
|
||||
threadLocal[i].store(nullptr);
|
||||
}
|
||||
|
@ -293,11 +295,7 @@ namespace embree
|
|||
size_t threadIndex = allocThreadIndex();
|
||||
condition.wait(mutex, [&] () { return hasRootTask.load(); });
|
||||
mutex.unlock();
|
||||
// -- GODOT start --
|
||||
// std::exception_ptr except = thread_loop(threadIndex);
|
||||
// if (except != nullptr) std::rethrow_exception(except);
|
||||
thread_loop(threadIndex);
|
||||
// -- GODOT end --
|
||||
}
|
||||
|
||||
void TaskScheduler::reset() {
|
||||
|
@ -321,18 +319,15 @@ namespace embree
|
|||
return old;
|
||||
}
|
||||
|
||||
dll_export bool TaskScheduler::wait()
|
||||
dll_export void TaskScheduler::wait()
|
||||
{
|
||||
Thread* thread = TaskScheduler::thread();
|
||||
if (thread == nullptr) return true;
|
||||
if (thread == nullptr)
|
||||
return;
|
||||
while (thread->tasks.execute_local_internal(*thread,thread->task)) {};
|
||||
return thread->scheduler->cancellingException == nullptr;
|
||||
}
|
||||
|
||||
// -- GODOT start --
|
||||
// std::exception_ptr TaskScheduler::thread_loop(size_t threadIndex)
|
||||
void TaskScheduler::thread_loop(size_t threadIndex)
|
||||
// -- GODOT end --
|
||||
{
|
||||
/* allocate thread structure */
|
||||
std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation
|
||||
|
@ -354,11 +349,6 @@ namespace embree
|
|||
threadLocal[threadIndex].store(nullptr);
|
||||
swapThread(oldThread);
|
||||
|
||||
/* remember exception to throw */
|
||||
// -- GODOT start --
|
||||
// std::exception_ptr except = nullptr;
|
||||
// if (cancellingException != nullptr) except = cancellingException;
|
||||
// -- GODOT end --
|
||||
/* wait for all threads to terminate */
|
||||
threadCounter--;
|
||||
#if defined(__WIN32__)
|
||||
|
@ -376,10 +366,6 @@ namespace embree
|
|||
yield();
|
||||
#endif
|
||||
}
|
||||
// -- GODOT start --
|
||||
// return except;
|
||||
return;
|
||||
// -- GODOT end --
|
||||
}
|
||||
|
||||
bool TaskScheduler::steal_from_other_threads(Thread& thread)
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "../sys/ref.h"
|
||||
#include "../sys/atomic.h"
|
||||
#include "../math/range.h"
|
||||
#include "../../include/embree3/rtcore.h"
|
||||
#include "../../include/embree4/rtcore.h"
|
||||
|
||||
#include <list>
|
||||
|
||||
|
@ -38,6 +38,13 @@ namespace embree
|
|||
virtual void execute() = 0;
|
||||
};
|
||||
|
||||
|
||||
struct TaskGroupContext {
|
||||
TaskGroupContext() : cancellingException(nullptr) {}
|
||||
|
||||
std::exception_ptr cancellingException;
|
||||
};
|
||||
|
||||
/*! builds a task interface from a closure */
|
||||
template<typename Closure>
|
||||
struct ClosureTaskFunction : public TaskFunction
|
||||
|
@ -76,16 +83,16 @@ namespace embree
|
|||
: state(DONE) {}
|
||||
|
||||
/*! construction of new task */
|
||||
__forceinline Task (TaskFunction* closure, Task* parent, size_t stackPtr, size_t N)
|
||||
: dependencies(1), stealable(true), closure(closure), parent(parent), stackPtr(stackPtr), N(N)
|
||||
__forceinline Task (TaskFunction* closure, Task* parent, TaskGroupContext* context, size_t stackPtr, size_t N)
|
||||
: dependencies(1), stealable(true), closure(closure), parent(parent), context(context), stackPtr(stackPtr), N(N)
|
||||
{
|
||||
if (parent) parent->add_dependencies(+1);
|
||||
switch_state(DONE,INITIALIZED);
|
||||
}
|
||||
|
||||
/*! construction of stolen task, stealing thread will decrement initial dependency */
|
||||
__forceinline Task (TaskFunction* closure, Task* parent)
|
||||
: dependencies(1), stealable(false), closure(closure), parent(parent), stackPtr(-1), N(1)
|
||||
__forceinline Task (TaskFunction* closure, Task* parent, TaskGroupContext* context)
|
||||
: dependencies(1), stealable(false), closure(closure), parent(parent), context(context), stackPtr(-1), N(1)
|
||||
{
|
||||
switch_state(DONE,INITIALIZED);
|
||||
}
|
||||
|
@ -95,7 +102,7 @@ namespace embree
|
|||
{
|
||||
if (!stealable) return false;
|
||||
if (!try_switch_state(INITIALIZED,DONE)) return false;
|
||||
new (&child) Task(closure, this);
|
||||
new (&child) Task(closure, this, context);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -110,6 +117,7 @@ namespace embree
|
|||
std::atomic<bool> stealable; //!< true if task can be stolen
|
||||
TaskFunction* closure; //!< the closure to execute
|
||||
Task* parent; //!< parent task to signal when we are finished
|
||||
TaskGroupContext* context;
|
||||
size_t stackPtr; //!< stack location where closure is stored
|
||||
size_t N; //!< approximative size of task
|
||||
};
|
||||
|
@ -122,28 +130,32 @@ namespace embree
|
|||
__forceinline void* alloc(size_t bytes, size_t align = 64)
|
||||
{
|
||||
size_t ofs = bytes + ((align - stackPtr) & (align-1));
|
||||
if (stackPtr + ofs > CLOSURE_STACK_SIZE)
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("closure stack overflow");
|
||||
// -- GODOT start --
|
||||
// if (stackPtr + ofs > CLOSURE_STACK_SIZE)
|
||||
// throw std::runtime_error("closure stack overflow");
|
||||
if (stackPtr + ofs > CLOSURE_STACK_SIZE) {
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
// -- GODOT end --
|
||||
stackPtr += ofs;
|
||||
return &stack[stackPtr-bytes];
|
||||
}
|
||||
|
||||
template<typename Closure>
|
||||
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure)
|
||||
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
|
||||
{
|
||||
if (right >= TASK_STACK_SIZE)
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("task stack overflow");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
// -- GODOT start --
|
||||
// if (right >= TASK_STACK_SIZE)
|
||||
// throw std::runtime_error("task stack overflow");
|
||||
if (right >= TASK_STACK_SIZE) {
|
||||
abort();
|
||||
}
|
||||
// -- GODOT end --
|
||||
|
||||
/* allocate new task on right side of stack */
|
||||
size_t oldStackPtr = stackPtr;
|
||||
TaskFunction* func = new (alloc(sizeof(ClosureTaskFunction<Closure>))) ClosureTaskFunction<Closure>(closure);
|
||||
new (&(tasks[right.load()])) Task(func,thread.task,oldStackPtr,size);
|
||||
new (&tasks[right.load()]) Task(func,thread.task,context,oldStackPtr,size);
|
||||
right++;
|
||||
|
||||
/* also move left pointer */
|
||||
|
@ -178,7 +190,7 @@ namespace embree
|
|||
: threadIndex(threadIndex), task(nullptr), scheduler(scheduler) {}
|
||||
|
||||
__forceinline size_t threadCount() {
|
||||
return scheduler->threadCounter;
|
||||
return scheduler->threadCounter;
|
||||
}
|
||||
|
||||
size_t threadIndex; //!< ID of this thread
|
||||
|
@ -244,10 +256,7 @@ namespace embree
|
|||
void wait_for_threads(size_t threadCount);
|
||||
|
||||
/*! thread loop for all worker threads */
|
||||
// -- GODOT start --
|
||||
// std::exception_ptr thread_loop(size_t threadIndex);
|
||||
void thread_loop(size_t threadIndex);
|
||||
// -- GODOT end --
|
||||
|
||||
/*! steals a task from a different thread */
|
||||
bool steal_from_other_threads(Thread& thread);
|
||||
|
@ -257,7 +266,7 @@ namespace embree
|
|||
|
||||
/* spawn a new task at the top of the threads task stack */
|
||||
template<typename Closure>
|
||||
void spawn_root(const Closure& closure, size_t size = 1, bool useThreadPool = true)
|
||||
void spawn_root(const Closure& closure, TaskGroupContext* context, size_t size = 1, bool useThreadPool = true)
|
||||
{
|
||||
if (useThreadPool) startThreads();
|
||||
|
||||
|
@ -267,7 +276,7 @@ namespace embree
|
|||
assert(threadLocal[threadIndex].load() == nullptr);
|
||||
threadLocal[threadIndex] = &thread;
|
||||
Thread* oldThread = swapThread(&thread);
|
||||
thread.tasks.push_right(thread,size,closure);
|
||||
thread.tasks.push_right(thread,size,closure,context);
|
||||
{
|
||||
Lock<MutexSys> lock(mutex);
|
||||
anyTasksRunning++;
|
||||
|
@ -286,51 +295,52 @@ namespace embree
|
|||
|
||||
/* remember exception to throw */
|
||||
std::exception_ptr except = nullptr;
|
||||
if (cancellingException != nullptr) except = cancellingException;
|
||||
if (context->cancellingException != nullptr) except = context->cancellingException;
|
||||
|
||||
/* wait for all threads to terminate */
|
||||
threadCounter--;
|
||||
while (threadCounter > 0) yield();
|
||||
cancellingException = nullptr;
|
||||
context->cancellingException = nullptr;
|
||||
|
||||
/* re-throw proper exception */
|
||||
if (except != nullptr)
|
||||
if (except != nullptr) {
|
||||
std::rethrow_exception(except);
|
||||
}
|
||||
}
|
||||
|
||||
/* spawn a new task at the top of the threads task stack */
|
||||
template<typename Closure>
|
||||
static __forceinline void spawn(size_t size, const Closure& closure)
|
||||
static __forceinline void spawn(size_t size, const Closure& closure, TaskGroupContext* context)
|
||||
{
|
||||
Thread* thread = TaskScheduler::thread();
|
||||
if (likely(thread != nullptr)) thread->tasks.push_right(*thread,size,closure);
|
||||
else instance()->spawn_root(closure,size);
|
||||
if (likely(thread != nullptr)) thread->tasks.push_right(*thread,size,closure,context);
|
||||
else instance()->spawn_root(closure,context,size);
|
||||
}
|
||||
|
||||
/* spawn a new task at the top of the threads task stack */
|
||||
template<typename Closure>
|
||||
static __forceinline void spawn(const Closure& closure) {
|
||||
spawn(1,closure);
|
||||
static __forceinline void spawn(const Closure& closure, TaskGroupContext* taskGroupContext) {
|
||||
spawn(1,closure,taskGroupContext);
|
||||
}
|
||||
|
||||
/* spawn a new task set */
|
||||
template<typename Index, typename Closure>
|
||||
static void spawn(const Index begin, const Index end, const Index blockSize, const Closure& closure)
|
||||
static void spawn(const Index begin, const Index end, const Index blockSize, const Closure& closure, TaskGroupContext* context)
|
||||
{
|
||||
spawn(end-begin, [=]()
|
||||
{
|
||||
if (end-begin <= blockSize) {
|
||||
return closure(range<Index>(begin,end));
|
||||
}
|
||||
const Index center = (begin+end)/2;
|
||||
spawn(begin,center,blockSize,closure);
|
||||
spawn(center,end ,blockSize,closure);
|
||||
wait();
|
||||
});
|
||||
{
|
||||
if (end-begin <= blockSize) {
|
||||
return closure(range<Index>(begin,end));
|
||||
}
|
||||
const Index center = (begin+end)/2;
|
||||
spawn(begin,center,blockSize,closure,context);
|
||||
spawn(center,end ,blockSize,closure,context);
|
||||
wait();
|
||||
},context);
|
||||
}
|
||||
|
||||
/* work on spawned subtasks and wait until all have finished */
|
||||
dll_export static bool wait();
|
||||
dll_export static void wait();
|
||||
|
||||
/* returns the ID of the current thread */
|
||||
dll_export static size_t threadID();
|
||||
|
@ -366,7 +376,6 @@ namespace embree
|
|||
std::atomic<size_t> threadCounter;
|
||||
std::atomic<size_t> anyTasksRunning;
|
||||
std::atomic<bool> hasRootTask;
|
||||
std::exception_ptr cancellingException;
|
||||
MutexSys mutex;
|
||||
ConditionSys condition;
|
||||
|
||||
|
|
|
@ -15,6 +15,12 @@
|
|||
# define NOMINMAX
|
||||
#endif
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
// We need to define these to avoid implicit linkage against
|
||||
// tbb_debug.lib under Windows. When removing these lines debug build
|
||||
// under Windows fails.
|
||||
|
@ -25,6 +31,18 @@
|
|||
#include "tbb/tbb.h"
|
||||
#include "tbb/parallel_sort.h"
|
||||
|
||||
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR >= 8)
|
||||
# define USE_TASK_ARENA 1
|
||||
#else
|
||||
# define USE_TASK_ARENA 0
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION >= 11009) // TBB 2019 Update 9
|
||||
# define TASKING_TBB_USE_TASK_ISOLATION 1
|
||||
#else
|
||||
# define TASKING_TBB_USE_TASK_ISOLATION 0
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct TaskScheduler
|
||||
|
@ -65,3 +83,7 @@ namespace embree
|
|||
};
|
||||
|
||||
};
|
||||
|
||||
#if defined(__INTEL_LLVM_COMPILER)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
|
@ -1,163 +0,0 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "rtcore_device.h"
|
||||
|
||||
RTC_NAMESPACE_BEGIN
|
||||
|
||||
/* Forward declarations for ray structures */
|
||||
struct RTCRayHit;
|
||||
struct RTCRayHit4;
|
||||
struct RTCRayHit8;
|
||||
struct RTCRayHit16;
|
||||
struct RTCRayHitNp;
|
||||
|
||||
/* Scene flags */
|
||||
enum RTCSceneFlags
|
||||
{
|
||||
RTC_SCENE_FLAG_NONE = 0,
|
||||
RTC_SCENE_FLAG_DYNAMIC = (1 << 0),
|
||||
RTC_SCENE_FLAG_COMPACT = (1 << 1),
|
||||
RTC_SCENE_FLAG_ROBUST = (1 << 2),
|
||||
RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION = (1 << 3)
|
||||
};
|
||||
|
||||
/* Creates a new scene. */
|
||||
RTC_API RTCScene rtcNewScene(RTCDevice device);
|
||||
|
||||
/* Returns the device the scene got created in. The reference count of
|
||||
* the device is incremented by this function. */
|
||||
RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene);
|
||||
|
||||
/* Retains the scene (increments the reference count). */
|
||||
RTC_API void rtcRetainScene(RTCScene scene);
|
||||
|
||||
/* Releases the scene (decrements the reference count). */
|
||||
RTC_API void rtcReleaseScene(RTCScene scene);
|
||||
|
||||
|
||||
/* Attaches the geometry to a scene. */
|
||||
RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry);
|
||||
|
||||
/* Attaches the geometry to a scene using the specified geometry ID. */
|
||||
RTC_API void rtcAttachGeometryByID(RTCScene scene, RTCGeometry geometry, unsigned int geomID);
|
||||
|
||||
/* Detaches the geometry from the scene. */
|
||||
RTC_API void rtcDetachGeometry(RTCScene scene, unsigned int geomID);
|
||||
|
||||
/* Gets a geometry handle from the scene. This function is not thread safe and should get used during rendering. */
|
||||
RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
|
||||
|
||||
/* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
|
||||
RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
|
||||
|
||||
|
||||
/* Commits the scene. */
|
||||
RTC_API void rtcCommitScene(RTCScene scene);
|
||||
|
||||
/* Commits the scene from multiple threads. */
|
||||
RTC_API void rtcJoinCommitScene(RTCScene scene);
|
||||
|
||||
|
||||
/* Progress monitor callback function */
|
||||
typedef bool (*RTCProgressMonitorFunction)(void* ptr, double n);
|
||||
|
||||
/* Sets the progress monitor callback function of the scene. */
|
||||
RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene scene, RTCProgressMonitorFunction progress, void* ptr);
|
||||
|
||||
/* Sets the build quality of the scene. */
|
||||
RTC_API void rtcSetSceneBuildQuality(RTCScene scene, enum RTCBuildQuality quality);
|
||||
|
||||
/* Sets the scene flags. */
|
||||
RTC_API void rtcSetSceneFlags(RTCScene scene, enum RTCSceneFlags flags);
|
||||
|
||||
/* Returns the scene flags. */
|
||||
RTC_API enum RTCSceneFlags rtcGetSceneFlags(RTCScene scene);
|
||||
|
||||
/* Returns the axis-aligned bounds of the scene. */
|
||||
RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o);
|
||||
|
||||
/* Returns the linear axis-aligned bounds of the scene. */
|
||||
RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o);
|
||||
|
||||
|
||||
/* Perform a closest point query of the scene. */
|
||||
RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
|
||||
|
||||
/* Perform a closest point query with a packet of 4 points with the scene. */
|
||||
RTC_API bool rtcPointQuery4(const int* valid, RTCScene scene, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
|
||||
|
||||
/* Perform a closest point query with a packet of 4 points with the scene. */
|
||||
RTC_API bool rtcPointQuery8(const int* valid, RTCScene scene, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
|
||||
|
||||
/* Perform a closest point query with a packet of 4 points with the scene. */
|
||||
RTC_API bool rtcPointQuery16(const int* valid, RTCScene scene, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
|
||||
|
||||
/* Intersects a single ray with the scene. */
|
||||
RTC_API void rtcIntersect1(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit* rayhit);
|
||||
|
||||
/* Intersects a packet of 4 rays with the scene. */
|
||||
RTC_API void rtcIntersect4(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit4* rayhit);
|
||||
|
||||
/* Intersects a packet of 8 rays with the scene. */
|
||||
RTC_API void rtcIntersect8(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit8* rayhit);
|
||||
|
||||
/* Intersects a packet of 16 rays with the scene. */
|
||||
RTC_API void rtcIntersect16(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit16* rayhit);
|
||||
|
||||
/* Intersects a stream of M rays with the scene. */
|
||||
RTC_API void rtcIntersect1M(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit* rayhit, unsigned int M, size_t byteStride);
|
||||
|
||||
/* Intersects a stream of pointers to M rays with the scene. */
|
||||
RTC_API void rtcIntersect1Mp(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit** rayhit, unsigned int M);
|
||||
|
||||
/* Intersects a stream of M ray packets of size N in SOA format with the scene. */
|
||||
RTC_API void rtcIntersectNM(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHitN* rayhit, unsigned int N, unsigned int M, size_t byteStride);
|
||||
|
||||
/* Intersects a stream of M ray packets of size N in SOA format with the scene. */
|
||||
RTC_API void rtcIntersectNp(RTCScene scene, struct RTCIntersectContext* context, const struct RTCRayHitNp* rayhit, unsigned int N);
|
||||
|
||||
/* Tests a single ray for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded1(RTCScene scene, struct RTCIntersectContext* context, struct RTCRay* ray);
|
||||
|
||||
/* Tests a packet of 4 rays for occlusion occluded with the scene. */
|
||||
RTC_API void rtcOccluded4(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRay4* ray);
|
||||
|
||||
/* Tests a packet of 8 rays for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded8(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRay8* ray);
|
||||
|
||||
/* Tests a packet of 16 rays for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded16(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRay16* ray);
|
||||
|
||||
/* Tests a stream of M rays for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded1M(RTCScene scene, struct RTCIntersectContext* context, struct RTCRay* ray, unsigned int M, size_t byteStride);
|
||||
|
||||
/* Tests a stream of pointers to M rays for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded1Mp(RTCScene scene, struct RTCIntersectContext* context, struct RTCRay** ray, unsigned int M);
|
||||
|
||||
/* Tests a stream of M ray packets of size N in SOA format for occlusion with the scene. */
|
||||
RTC_API void rtcOccludedNM(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayN* ray, unsigned int N, unsigned int M, size_t byteStride);
|
||||
|
||||
/* Tests a stream of M ray packets of size N in SOA format for occlusion with the scene. */
|
||||
RTC_API void rtcOccludedNp(RTCScene scene, struct RTCIntersectContext* context, const struct RTCRayNp* ray, unsigned int N);
|
||||
|
||||
/*! collision callback */
|
||||
struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; };
|
||||
typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions, unsigned int num_collisions);
|
||||
|
||||
/*! Performs collision detection of two scenes */
|
||||
RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
/* Helper for easily combining scene flags */
|
||||
inline RTCSceneFlags operator|(RTCSceneFlags a, RTCSceneFlags b) {
|
||||
return (RTCSceneFlags)((size_t)a | (size_t)b);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
RTC_NAMESPACE_END
|
||||
|
|
@ -27,6 +27,8 @@ enum RTCBufferType
|
|||
RTC_BUFFER_TYPE_VERTEX_CREASE_WEIGHT = 21,
|
||||
RTC_BUFFER_TYPE_HOLE = 22,
|
||||
|
||||
RTC_BUFFER_TYPE_TRANSFORM = 23,
|
||||
|
||||
RTC_BUFFER_TYPE_FLAGS = 32
|
||||
};
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
RTC_NAMESPACE_BEGIN
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_M_X64)
|
||||
#if defined(_M_X64) || defined(_M_ARM64)
|
||||
typedef long long ssize_t;
|
||||
#else
|
||||
typedef int ssize_t;
|
||||
|
@ -41,6 +41,12 @@ typedef int ssize_t;
|
|||
# define RTC_FORCEINLINE inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
# define RTC_OPTIONAL_ARGUMENT = nullptr
|
||||
#else
|
||||
# define RTC_OPTIONAL_ARGUMENT
|
||||
#endif
|
||||
|
||||
/* Invalid geometry ID */
|
||||
#define RTC_INVALID_GEOMETRY_ID ((unsigned int)-1)
|
||||
|
||||
|
@ -141,7 +147,9 @@ enum RTCFormat
|
|||
RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR = 0x9244,
|
||||
|
||||
/* special 12-byte format for grids */
|
||||
RTC_FORMAT_GRID = 0xA001
|
||||
RTC_FORMAT_GRID = 0xA001,
|
||||
|
||||
RTC_FORMAT_QUATERNION_DECOMPOSITION = 0xB001,
|
||||
};
|
||||
|
||||
/* Build quality levels */
|
||||
|
@ -167,12 +175,138 @@ struct RTC_ALIGN(16) RTCLinearBounds
|
|||
struct RTCBounds bounds1;
|
||||
};
|
||||
|
||||
/* Intersection context flags */
|
||||
enum RTCIntersectContextFlags
|
||||
/* Feature flags for SYCL specialization constants */
|
||||
enum RTCFeatureFlags
|
||||
{
|
||||
RTC_INTERSECT_CONTEXT_FLAG_NONE = 0,
|
||||
RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT = (0 << 0), // optimize for incoherent rays
|
||||
RTC_INTERSECT_CONTEXT_FLAG_COHERENT = (1 << 0) // optimize for coherent rays
|
||||
RTC_FEATURE_FLAG_NONE = 0,
|
||||
|
||||
RTC_FEATURE_FLAG_MOTION_BLUR = 1 << 0,
|
||||
|
||||
RTC_FEATURE_FLAG_TRIANGLE = 1 << 1,
|
||||
RTC_FEATURE_FLAG_QUAD = 1 << 2,
|
||||
RTC_FEATURE_FLAG_GRID = 1 << 3,
|
||||
|
||||
RTC_FEATURE_FLAG_SUBDIVISION = 1 << 4,
|
||||
|
||||
RTC_FEATURE_FLAG_CONE_LINEAR_CURVE = 1 << 5,
|
||||
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE = 1 << 6,
|
||||
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE = 1 << 7,
|
||||
|
||||
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE = 1 << 8,
|
||||
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE = 1 << 9,
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE = 1 << 10,
|
||||
|
||||
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE = 1 << 11,
|
||||
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE = 1 << 12,
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE = 1 << 13,
|
||||
|
||||
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE = 1 << 14,
|
||||
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE = 1 << 15,
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE = 1 << 16,
|
||||
|
||||
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE = 1 << 17,
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE = 1 << 18,
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE = 1 << 19,
|
||||
|
||||
RTC_FEATURE_FLAG_SPHERE_POINT = 1 << 20,
|
||||
RTC_FEATURE_FLAG_DISC_POINT = 1 << 21,
|
||||
RTC_FEATURE_FLAG_ORIENTED_DISC_POINT = 1 << 22,
|
||||
|
||||
RTC_FEATURE_FLAG_POINT =
|
||||
RTC_FEATURE_FLAG_SPHERE_POINT |
|
||||
RTC_FEATURE_FLAG_DISC_POINT |
|
||||
RTC_FEATURE_FLAG_ORIENTED_DISC_POINT,
|
||||
|
||||
RTC_FEATURE_FLAG_ROUND_CURVES =
|
||||
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_FLAT_CURVES =
|
||||
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CURVES =
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_LINEAR_CURVES =
|
||||
RTC_FEATURE_FLAG_CONE_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_BEZIER_CURVES =
|
||||
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_BSPLINE_CURVES =
|
||||
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_HERMITE_CURVES =
|
||||
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_CURVES =
|
||||
RTC_FEATURE_FLAG_CONE_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE |
|
||||
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
|
||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE |
|
||||
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE,
|
||||
|
||||
RTC_FEATURE_FLAG_INSTANCE = 1 << 23,
|
||||
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = 1 << 24,
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY = 1 << 25,
|
||||
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION =
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
|
||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY,
|
||||
|
||||
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS = 1 << 26,
|
||||
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY = 1 << 27,
|
||||
|
||||
RTC_FEATURE_FLAG_USER_GEOMETRY =
|
||||
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS |
|
||||
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY,
|
||||
|
||||
RTC_FEATURE_FLAG_32_BIT_RAY_MASK = 1 << 28,
|
||||
|
||||
RTC_FEATURE_FLAG_INSTANCE_ARRAY = 1 << 29,
|
||||
|
||||
RTC_FEATURE_FLAG_ALL = 0xffffffff,
|
||||
};
|
||||
|
||||
/* Ray query flags */
|
||||
enum RTCRayQueryFlags
|
||||
{
|
||||
/* matching intel_ray_flags_t layout */
|
||||
RTC_RAY_QUERY_FLAG_NONE = 0,
|
||||
RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER = (1 << 1), // enable argument filter for each geometry
|
||||
|
||||
/* embree specific flags */
|
||||
RTC_RAY_QUERY_FLAG_INCOHERENT = (0 << 16), // optimize for incoherent rays
|
||||
RTC_RAY_QUERY_FLAG_COHERENT = (1 << 16), // optimize for coherent rays
|
||||
};
|
||||
|
||||
/* Arguments for RTCFilterFunctionN */
|
||||
|
@ -180,7 +314,7 @@ struct RTCFilterFunctionNArguments
|
|||
{
|
||||
int* valid;
|
||||
void* geometryUserPtr;
|
||||
struct RTCIntersectContext* context;
|
||||
struct RTCRayQueryContext* context;
|
||||
struct RTCRayN* ray;
|
||||
struct RTCHitN* hit;
|
||||
unsigned int N;
|
||||
|
@ -189,38 +323,41 @@ struct RTCFilterFunctionNArguments
|
|||
/* Filter callback function */
|
||||
typedef void (*RTCFilterFunctionN)(const struct RTCFilterFunctionNArguments* args);
|
||||
|
||||
/* Intersection context passed to intersect/occluded calls */
|
||||
struct RTCIntersectContext
|
||||
/* Intersection callback function */
|
||||
struct RTCIntersectFunctionNArguments;
|
||||
typedef void (*RTCIntersectFunctionN)(const struct RTCIntersectFunctionNArguments* args);
|
||||
|
||||
/* Occlusion callback function */
|
||||
struct RTCOccludedFunctionNArguments;
|
||||
typedef void (*RTCOccludedFunctionN)(const struct RTCOccludedFunctionNArguments* args);
|
||||
|
||||
/* Ray query context passed to intersect/occluded calls */
|
||||
struct RTCRayQueryContext
|
||||
{
|
||||
enum RTCIntersectContextFlags flags; // intersection flags
|
||||
RTCFilterFunctionN filter; // filter function to execute
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
unsigned int instStackSize; // Number of instances currently on the stack.
|
||||
#endif
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // The current stack of instance ids.
|
||||
|
||||
#if RTC_MIN_WIDTH
|
||||
float minWidthDistanceFactor; // curve radius is set to this factor times distance to ray origin
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // The current stack of instance primitive ids.
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Initializes an intersection context. */
|
||||
RTC_FORCEINLINE void rtcInitIntersectContext(struct RTCIntersectContext* context)
|
||||
/* Initializes an ray query context. */
|
||||
RTC_FORCEINLINE void rtcInitRayQueryContext(struct RTCRayQueryContext* context)
|
||||
{
|
||||
unsigned l = 0;
|
||||
context->flags = RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT;
|
||||
context->filter = NULL;
|
||||
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
context->instStackSize = 0;
|
||||
#endif
|
||||
for (; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
|
||||
|
||||
for (; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
context->instID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
|
||||
#if RTC_MIN_WIDTH
|
||||
context->minWidthDistanceFactor = 0.0f;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Point query structure for closest point query */
|
||||
|
@ -278,15 +415,28 @@ struct RTC_ALIGN(16) RTCPointQueryContext
|
|||
// instance ids.
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT];
|
||||
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
// instance prim ids.
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT];
|
||||
#endif
|
||||
|
||||
// number of instances currently on the stack.
|
||||
unsigned int instStackSize;
|
||||
};
|
||||
|
||||
/* Initializes an intersection context. */
|
||||
/* Initializes an ray query context. */
|
||||
RTC_FORCEINLINE void rtcInitPointQueryContext(struct RTCPointQueryContext* context)
|
||||
{
|
||||
unsigned l = 0;
|
||||
|
||||
context->instStackSize = 0;
|
||||
context->instID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
|
||||
for (; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
context->instID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
struct RTC_ALIGN(16) RTCPointQueryFunctionArguments
|
||||
|
@ -308,7 +458,7 @@ struct RTC_ALIGN(16) RTCPointQueryFunctionArguments
|
|||
struct RTCPointQueryContext* context;
|
||||
|
||||
// If the current instance transform M (= context->world2inst[context->instStackSize])
|
||||
// is a similarity matrix, i.e there is a constant factor similarityScale such that,
|
||||
// is a similarity matrix, i.e there is a constant factor similarityScale such that
|
||||
// for all x,y: dist(Mx, My) = similarityScale * dist(x, y),
|
||||
// The similarity scale is 0, if the current instance transform is not a
|
||||
// similarity transform and vice versa. The similarity scale allows to compute
|
||||
|
@ -322,5 +472,31 @@ struct RTC_ALIGN(16) RTCPointQueryFunctionArguments
|
|||
};
|
||||
|
||||
typedef bool (*RTCPointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
|
||||
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
|
||||
|
||||
/* returns function pointer to be usable in SYCL kernel */
|
||||
template<auto F>
|
||||
inline decltype(F) rtcGetSYCLDeviceFunctionPointer(sycl::queue& queue)
|
||||
{
|
||||
sycl::buffer<cl_ulong> fptr_buf(1);
|
||||
{
|
||||
auto fptr_acc = fptr_buf.get_host_access();
|
||||
fptr_acc[0] = 0;
|
||||
}
|
||||
|
||||
queue.submit([&](sycl::handler& cgh) {
|
||||
auto fptr_acc = fptr_buf.get_access<sycl::access::mode::discard_write>(cgh);
|
||||
cgh.single_task([=]() {
|
||||
fptr_acc[0] = reinterpret_cast<cl_ulong>(F);
|
||||
});
|
||||
});
|
||||
queue.wait_and_throw();
|
||||
|
||||
auto fptr_acc = fptr_buf.get_host_access();
|
||||
return (decltype(F)) fptr_acc[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
RTC_NAMESPACE_END
|
|
@ -3,21 +3,32 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#define RTC_VERSION_MAJOR 3
|
||||
#define RTC_VERSION_MINOR 13
|
||||
#define RTC_VERSION_PATCH 5
|
||||
#define RTC_VERSION 31305
|
||||
#define RTC_VERSION_STRING "3.13.5"
|
||||
#if !defined(EMBREE_SYCL_SUPPORT)
|
||||
// #cmakedefine EMBREE_SYCL_SUPPORT
|
||||
#endif
|
||||
|
||||
#define RTC_VERSION_MAJOR 4
|
||||
#define RTC_VERSION_MINOR 3
|
||||
#define RTC_VERSION_PATCH 1
|
||||
#define RTC_VERSION 40301
|
||||
#define RTC_VERSION_STRING "4.3.1"
|
||||
|
||||
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
|
||||
|
||||
// #cmakedefine EMBREE_GEOMETRY_INSTANCE_ARRAY
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
#define RTC_GEOMETRY_INSTANCE_ARRAY
|
||||
#endif
|
||||
|
||||
// #cmakedefine01 EMBREE_SYCL_GEOMETRY_CALLBACK
|
||||
|
||||
#define EMBREE_MIN_WIDTH 0
|
||||
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
|
||||
|
||||
#if !defined(EMBREE_STATIC_LIB)
|
||||
# define EMBREE_STATIC_LIB
|
||||
#define EMBREE_STATIC_LIB
|
||||
#endif
|
||||
/* #undef EMBREE_API_NAMESPACE*/
|
||||
// #cmakedefine EMBREE_API_NAMESPACE
|
||||
|
||||
#if defined(EMBREE_API_NAMESPACE)
|
||||
# define RTC_NAMESPACE
|
||||
|
@ -56,3 +67,14 @@
|
|||
#else
|
||||
# define RTC_API RTC_API_IMPORT
|
||||
#endif
|
||||
|
||||
#if defined(ISPC)
|
||||
# define RTC_SYCL_INDIRECTLY_CALLABLE
|
||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
||||
# define RTC_SYCL_INDIRECTLY_CALLABLE [[intel::device_indirectly_callable]] SYCL_EXTERNAL
|
||||
# define RTC_SYCL_API SYCL_EXTERNAL
|
||||
#else
|
||||
# define RTC_SYCL_INDIRECTLY_CALLABLE
|
||||
# define RTC_SYCL_API RTC_API
|
||||
#endif
|
||||
|
|
@ -13,6 +13,24 @@ typedef struct RTCDeviceTy* RTCDevice;
|
|||
/* Creates a new Embree device. */
|
||||
RTC_API RTCDevice rtcNewDevice(const char* config);
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
|
||||
|
||||
|
||||
/* Creates a new Embree SYCL device. */
|
||||
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config);
|
||||
|
||||
/* Checks if SYCL device is supported by Embree. */
|
||||
RTC_API bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
|
||||
|
||||
/* SYCL selector for Embree supported devices */
|
||||
RTC_API int rtcSYCLDeviceSelector(const sycl::device sycl_device);
|
||||
|
||||
/* Set the SYCL device to be used to allocate data */
|
||||
RTC_API void rtcSetDeviceSYCLDevice(RTCDevice device, const sycl::device sycl_device);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* Retains the Embree device (increments the reference count). */
|
||||
RTC_API void rtcRetainDevice(RTCDevice device);
|
||||
|
||||
|
@ -30,8 +48,8 @@ enum RTCDeviceProperty
|
|||
RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED = 32,
|
||||
RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED = 33,
|
||||
RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED = 34,
|
||||
RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED = 35,
|
||||
|
||||
RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED = 62,
|
||||
RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED = 63,
|
||||
RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED = 64,
|
||||
RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED = 65,
|
||||
|
@ -66,7 +84,7 @@ enum RTCError
|
|||
RTC_ERROR_INVALID_OPERATION = 3,
|
||||
RTC_ERROR_OUT_OF_MEMORY = 4,
|
||||
RTC_ERROR_UNSUPPORTED_CPU = 5,
|
||||
RTC_ERROR_CANCELLED = 6
|
||||
RTC_ERROR_CANCELLED = 6,
|
||||
};
|
||||
|
||||
/* Returns the error code. */
|
|
@ -48,7 +48,8 @@ enum RTCGeometryType
|
|||
RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE = 60, // flat normal-oriented Catmull-Rom curves
|
||||
|
||||
RTC_GEOMETRY_TYPE_USER = 120, // user-defined geometry
|
||||
RTC_GEOMETRY_TYPE_INSTANCE = 121 // scene instance
|
||||
RTC_GEOMETRY_TYPE_INSTANCE = 121, // scene instance
|
||||
RTC_GEOMETRY_TYPE_INSTANCE_ARRAY = 122, // scene instance array
|
||||
};
|
||||
|
||||
/* Interpolation modes for subdivision surfaces */
|
||||
|
@ -86,30 +87,24 @@ struct RTCIntersectFunctionNArguments
|
|||
int* valid;
|
||||
void* geometryUserPtr;
|
||||
unsigned int primID;
|
||||
struct RTCIntersectContext* context;
|
||||
struct RTCRayQueryContext* context;
|
||||
struct RTCRayHitN* rayhit;
|
||||
unsigned int N;
|
||||
unsigned int geomID;
|
||||
};
|
||||
|
||||
/* Intersection callback function */
|
||||
typedef void (*RTCIntersectFunctionN)(const struct RTCIntersectFunctionNArguments* args);
|
||||
|
||||
/* Arguments for RTCOccludedFunctionN */
|
||||
struct RTCOccludedFunctionNArguments
|
||||
{
|
||||
int* valid;
|
||||
void* geometryUserPtr;
|
||||
unsigned int primID;
|
||||
struct RTCIntersectContext* context;
|
||||
struct RTCRayQueryContext* context;
|
||||
struct RTCRayN* ray;
|
||||
unsigned int N;
|
||||
unsigned int geomID;
|
||||
};
|
||||
|
||||
/* Occlusion callback function */
|
||||
typedef void (*RTCOccludedFunctionN)(const struct RTCOccludedFunctionNArguments* args);
|
||||
|
||||
/* Arguments for RTCDisplacementFunctionN */
|
||||
struct RTCDisplacementFunctionNArguments
|
||||
{
|
||||
|
@ -192,6 +187,9 @@ RTC_API void rtcSetGeometryIntersectFilterFunction(RTCGeometry geometry, RTCFilt
|
|||
/* Sets the occlusion filter callback function of the geometry. */
|
||||
RTC_API void rtcSetGeometryOccludedFilterFunction(RTCGeometry geometry, RTCFilterFunctionN filter);
|
||||
|
||||
/* Enables argument version of intersection or occlusion filter function. */
|
||||
RTC_API void rtcSetGeometryEnableFilterFunctionFromArguments(RTCGeometry geometry, bool enable);
|
||||
|
||||
/* Sets the user-defined data pointer of the geometry. */
|
||||
RTC_API void rtcSetGeometryUserData(RTCGeometry geometry, void* ptr);
|
||||
|
||||
|
@ -214,15 +212,17 @@ RTC_API void rtcSetGeometryIntersectFunction(RTCGeometry geometry, RTCIntersectF
|
|||
RTC_API void rtcSetGeometryOccludedFunction(RTCGeometry geometry, RTCOccludedFunctionN occluded);
|
||||
|
||||
/* Invokes the intersection filter from the intersection callback function. */
|
||||
RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
|
||||
RTC_SYCL_API void rtcInvokeIntersectFilterFromGeometry(const struct RTCIntersectFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
|
||||
|
||||
/* Invokes the occlusion filter from the occlusion callback function. */
|
||||
RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
|
||||
|
||||
RTC_SYCL_API void rtcInvokeOccludedFilterFromGeometry(const struct RTCOccludedFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
|
||||
|
||||
/* Sets the instanced scene of an instance geometry. */
|
||||
RTC_API void rtcSetGeometryInstancedScene(RTCGeometry geometry, RTCScene scene);
|
||||
|
||||
/* Sets the instanced scenes of an instance array geometry. */
|
||||
RTC_API void rtcSetGeometryInstancedScenes(RTCGeometry geometry, RTCScene* scenes, size_t numScenes);
|
||||
|
||||
/* Sets the transformation of an instance for the specified time step. */
|
||||
RTC_API void rtcSetGeometryTransform(RTCGeometry geometry, unsigned int timeStep, enum RTCFormat format, const void* xfm);
|
||||
|
||||
|
@ -232,6 +232,12 @@ RTC_API void rtcSetGeometryTransformQuaternion(RTCGeometry geometry, unsigned in
|
|||
/* Returns the interpolated transformation of an instance for the specified time. */
|
||||
RTC_API void rtcGetGeometryTransform(RTCGeometry geometry, float time, enum RTCFormat format, void* xfm);
|
||||
|
||||
/*
|
||||
* Returns the interpolated transformation of the instPrimID'th instance of an
|
||||
* instance array for the specified time. If geometry is an regular instance,
|
||||
* instPrimID must be 0.
|
||||
*/
|
||||
RTC_API void rtcGetGeometryTransformEx(RTCGeometry geometry, unsigned int instPrimID, float time, enum RTCFormat format, void* xfm);
|
||||
|
||||
/* Sets the uniform tessellation rate of the geometry. */
|
||||
RTC_API void rtcSetGeometryTessellationRate(RTCGeometry geometry, float tessellationRate);
|
|
@ -39,6 +39,9 @@ struct RTC_ALIGN(16) RTCHit
|
|||
unsigned int primID; // primitive ID
|
||||
unsigned int geomID; // geometry ID
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Combined ray/hit structure for a single ray */
|
||||
|
@ -80,6 +83,9 @@ struct RTC_ALIGN(16) RTCHit4
|
|||
unsigned int primID[4];
|
||||
unsigned int geomID[4];
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][4];
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][4];
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Combined ray/hit structure for a packet of 4 rays */
|
||||
|
@ -121,6 +127,9 @@ struct RTC_ALIGN(32) RTCHit8
|
|||
unsigned int primID[8];
|
||||
unsigned int geomID[8];
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][8];
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][8];
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Combined ray/hit structure for a packet of 8 rays */
|
||||
|
@ -162,6 +171,9 @@ struct RTC_ALIGN(64) RTCHit16
|
|||
unsigned int primID[16];
|
||||
unsigned int geomID[16];
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][16];
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][16];
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Combined ray/hit structure for a packet of 16 rays */
|
||||
|
@ -171,47 +183,6 @@ struct RTCRayHit16
|
|||
struct RTCHit16 hit;
|
||||
};
|
||||
|
||||
/* Ray structure for a packet/stream of N rays in pointer SOA layout */
|
||||
struct RTCRayNp
|
||||
{
|
||||
float* org_x;
|
||||
float* org_y;
|
||||
float* org_z;
|
||||
float* tnear;
|
||||
|
||||
float* dir_x;
|
||||
float* dir_y;
|
||||
float* dir_z;
|
||||
float* time;
|
||||
|
||||
float* tfar;
|
||||
unsigned int* mask;
|
||||
unsigned int* id;
|
||||
unsigned int* flags;
|
||||
};
|
||||
|
||||
/* Hit structure for a packet/stream of N rays in pointer SOA layout */
|
||||
struct RTCHitNp
|
||||
{
|
||||
float* Ng_x;
|
||||
float* Ng_y;
|
||||
float* Ng_z;
|
||||
|
||||
float* u;
|
||||
float* v;
|
||||
|
||||
unsigned int* primID;
|
||||
unsigned int* geomID;
|
||||
unsigned int* instID[RTC_MAX_INSTANCE_LEVEL_COUNT];
|
||||
};
|
||||
|
||||
/* Combined ray/hit structure for a packet/stream of N rays in pointer SOA layout */
|
||||
struct RTCRayHitNp
|
||||
{
|
||||
struct RTCRayNp ray;
|
||||
struct RTCHitNp hit;
|
||||
};
|
||||
|
||||
struct RTCRayN;
|
||||
struct RTCHitN;
|
||||
struct RTCRayHitN;
|
||||
|
@ -242,9 +213,12 @@ RTC_FORCEINLINE float& RTCHitN_Ng_z(RTCHitN* hit, unsigned int N, unsigned int i
|
|||
RTC_FORCEINLINE float& RTCHitN_u(RTCHitN* hit, unsigned int N, unsigned int i) { return ((float*)hit)[3*N+i]; }
|
||||
RTC_FORCEINLINE float& RTCHitN_v(RTCHitN* hit, unsigned int N, unsigned int i) { return ((float*)hit)[4*N+i]; }
|
||||
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_primID(RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[5*N+i]; }
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_geomID(RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[6*N+i]; }
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_instID(RTCHitN* hit, unsigned int N, unsigned int i, unsigned int l) { return ((unsigned*)hit)[7*N+i+N*l]; }
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_primID (RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[5*N+i]; }
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_geomID (RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[6*N+i]; }
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_instID (RTCHitN* hit, unsigned int N, unsigned int i, unsigned int l) { return ((unsigned*)hit)[7*N + N*l + i]; }
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
RTC_FORCEINLINE unsigned int& RTCHitN_instPrimID(RTCHitN* hit, unsigned int N, unsigned int i, unsigned int l) { return ((unsigned*)hit)[7*N + N*RTC_MAX_INSTANCE_LEVEL_COUNT + N*l + i]; }
|
||||
#endif
|
||||
|
||||
/* Helper functions to extract RTCRayN and RTCHitN from RTCRayHitN */
|
||||
RTC_FORCEINLINE RTCRayN* RTCRayHitN_RayN(RTCRayHitN* rayhit, unsigned int N) { return (RTCRayN*)&((float*)rayhit)[0*N]; }
|
||||
|
@ -284,6 +258,9 @@ struct RTCHitNt
|
|||
unsigned int primID[N];
|
||||
unsigned int geomID[N];
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][N];
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][N];
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Helper structure for a combined ray/hit packet of compile-time size N */
|
||||
|
@ -322,8 +299,12 @@ RTC_FORCEINLINE RTCHit rtcGetHitFromHitN(RTCHitN* hitN, unsigned int N, unsigned
|
|||
hit.v = RTCHitN_v(hitN,N,i);
|
||||
hit.primID = RTCHitN_primID(hitN,N,i);
|
||||
hit.geomID = RTCHitN_geomID(hitN,N,i);
|
||||
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++)
|
||||
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
|
||||
hit.instID[l] = RTCHitN_instID(hitN,N,i,l);
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
hit.instPrimID[l] = RTCHitN_instPrimID(hitN,N,i,l);
|
||||
#endif
|
||||
}
|
||||
return hit;
|
||||
}
|
||||
|
||||
|
@ -336,8 +317,12 @@ RTC_FORCEINLINE void rtcCopyHitToHitN(RTCHitN* hitN, const RTCHit* hit, unsigned
|
|||
RTCHitN_v(hitN,N,i) = hit->v;
|
||||
RTCHitN_primID(hitN,N,i) = hit->primID;
|
||||
RTCHitN_geomID(hitN,N,i) = hit->geomID;
|
||||
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++)
|
||||
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
|
||||
RTCHitN_instID(hitN,N,i,l) = hit->instID[l];
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
RTCHitN_instPrimID(hitN,N,i,l) = hit->instPrimID[l];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
RTC_FORCEINLINE RTCRayHit rtcGetRayHitFromRayHitN(RTCRayHitN* rayhitN, unsigned int N, unsigned int i)
|
||||
|
@ -366,8 +351,12 @@ RTC_FORCEINLINE RTCRayHit rtcGetRayHitFromRayHitN(RTCRayHitN* rayhitN, unsigned
|
|||
rh.hit.v = RTCHitN_v(hit,N,i);
|
||||
rh.hit.primID = RTCHitN_primID(hit,N,i);
|
||||
rh.hit.geomID = RTCHitN_geomID(hit,N,i);
|
||||
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++)
|
||||
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
|
||||
rh.hit.instID[l] = RTCHitN_instID(hit,N,i,l);
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
rh.hit.instPrimID[l] = RTCHitN_instPrimID(hit,N,i,l);
|
||||
#endif
|
||||
}
|
||||
|
||||
return rh;
|
||||
}
|
|
@ -0,0 +1,252 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "rtcore_device.h"
|
||||
|
||||
RTC_NAMESPACE_BEGIN
|
||||
|
||||
/* Forward declarations for ray structures */
|
||||
struct RTCRayHit;
|
||||
struct RTCRayHit4;
|
||||
struct RTCRayHit8;
|
||||
struct RTCRayHit16;
|
||||
|
||||
/* Scene flags */
|
||||
enum RTCSceneFlags
|
||||
{
|
||||
RTC_SCENE_FLAG_NONE = 0,
|
||||
RTC_SCENE_FLAG_DYNAMIC = (1 << 0),
|
||||
RTC_SCENE_FLAG_COMPACT = (1 << 1),
|
||||
RTC_SCENE_FLAG_ROBUST = (1 << 2),
|
||||
RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3)
|
||||
};
|
||||
|
||||
/* Additional arguments for rtcIntersect1/4/8/16 calls */
|
||||
struct RTCIntersectArguments
|
||||
{
|
||||
enum RTCRayQueryFlags flags; // intersection flags
|
||||
enum RTCFeatureFlags feature_mask; // selectively enable features for traversal
|
||||
struct RTCRayQueryContext* context; // optional pointer to ray query context
|
||||
RTCFilterFunctionN filter; // filter function to execute
|
||||
RTCIntersectFunctionN intersect; // user geometry intersection callback to execute
|
||||
#if RTC_MIN_WIDTH
|
||||
float minWidthDistanceFactor; // curve radius is set to this factor times distance to ray origin
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Initializes intersection arguments. */
|
||||
RTC_FORCEINLINE void rtcInitIntersectArguments(struct RTCIntersectArguments* args)
|
||||
{
|
||||
args->flags = RTC_RAY_QUERY_FLAG_INCOHERENT;
|
||||
args->feature_mask = RTC_FEATURE_FLAG_ALL;
|
||||
args->context = NULL;
|
||||
args->filter = NULL;
|
||||
args->intersect = NULL;
|
||||
|
||||
#if RTC_MIN_WIDTH
|
||||
args->minWidthDistanceFactor = 0.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Additional arguments for rtcOccluded1/4/8/16 calls */
|
||||
struct RTCOccludedArguments
|
||||
{
|
||||
enum RTCRayQueryFlags flags; // intersection flags
|
||||
enum RTCFeatureFlags feature_mask; // selectively enable features for traversal
|
||||
struct RTCRayQueryContext* context; // optional pointer to ray query context
|
||||
RTCFilterFunctionN filter; // filter function to execute
|
||||
RTCOccludedFunctionN occluded; // user geometry occlusion callback to execute
|
||||
|
||||
#if RTC_MIN_WIDTH
|
||||
float minWidthDistanceFactor; // curve radius is set to this factor times distance to ray origin
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Initializes an intersection arguments. */
|
||||
RTC_FORCEINLINE void rtcInitOccludedArguments(struct RTCOccludedArguments* args)
|
||||
{
|
||||
args->flags = RTC_RAY_QUERY_FLAG_INCOHERENT;
|
||||
args->feature_mask = RTC_FEATURE_FLAG_ALL;
|
||||
args->context = NULL;
|
||||
args->filter = NULL;
|
||||
args->occluded = NULL;
|
||||
|
||||
#if RTC_MIN_WIDTH
|
||||
args->minWidthDistanceFactor = 0.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Creates a new scene. */
|
||||
RTC_API RTCScene rtcNewScene(RTCDevice device);
|
||||
|
||||
/* Returns the device the scene got created in. The reference count of
|
||||
* the device is incremented by this function. */
|
||||
RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene);
|
||||
|
||||
/* Retains the scene (increments the reference count). */
|
||||
RTC_API void rtcRetainScene(RTCScene scene);
|
||||
|
||||
/* Releases the scene (decrements the reference count). */
|
||||
RTC_API void rtcReleaseScene(RTCScene scene);
|
||||
|
||||
|
||||
/* Attaches the geometry to a scene. */
|
||||
RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry);
|
||||
|
||||
/* Attaches the geometry to a scene using the specified geometry ID. */
|
||||
RTC_API void rtcAttachGeometryByID(RTCScene scene, RTCGeometry geometry, unsigned int geomID);
|
||||
|
||||
/* Detaches the geometry from the scene. */
|
||||
RTC_API void rtcDetachGeometry(RTCScene scene, unsigned int geomID);
|
||||
|
||||
/* Gets a geometry handle from the scene. This function is not thread safe and should get used during rendering. */
|
||||
RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
|
||||
|
||||
/* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
|
||||
RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
|
||||
|
||||
/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
|
||||
RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID);
|
||||
|
||||
/* Returns the interpolated transformation of an instance for the specified time. */
|
||||
RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
|
||||
|
||||
|
||||
/* Commits the scene. */
|
||||
RTC_API void rtcCommitScene(RTCScene scene);
|
||||
|
||||
/* Commits the scene from multiple threads. */
|
||||
RTC_API void rtcJoinCommitScene(RTCScene scene);
|
||||
|
||||
|
||||
/* Progress monitor callback function */
|
||||
typedef bool (*RTCProgressMonitorFunction)(void* ptr, double n);
|
||||
|
||||
/* Sets the progress monitor callback function of the scene. */
|
||||
RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene scene, RTCProgressMonitorFunction progress, void* ptr);
|
||||
|
||||
/* Sets the build quality of the scene. */
|
||||
RTC_API void rtcSetSceneBuildQuality(RTCScene scene, enum RTCBuildQuality quality);
|
||||
|
||||
/* Sets the scene flags. */
|
||||
RTC_API void rtcSetSceneFlags(RTCScene scene, enum RTCSceneFlags flags);
|
||||
|
||||
/* Returns the scene flags. */
|
||||
RTC_API enum RTCSceneFlags rtcGetSceneFlags(RTCScene scene);
|
||||
|
||||
/* Returns the axis-aligned bounds of the scene. */
|
||||
RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o);
|
||||
|
||||
/* Returns the linear axis-aligned bounds of the scene. */
|
||||
RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o);
|
||||
|
||||
|
||||
/* Perform a closest point query of the scene. */
|
||||
RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
|
||||
|
||||
/* Perform a closest point query with a packet of 4 points with the scene. */
|
||||
RTC_API bool rtcPointQuery4(const int* valid, RTCScene scene, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
|
||||
|
||||
/* Perform a closest point query with a packet of 4 points with the scene. */
|
||||
RTC_API bool rtcPointQuery8(const int* valid, RTCScene scene, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
|
||||
|
||||
/* Perform a closest point query with a packet of 4 points with the scene. */
|
||||
RTC_API bool rtcPointQuery16(const int* valid, RTCScene scene, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
|
||||
|
||||
|
||||
/* Intersects a single ray with the scene. */
|
||||
RTC_SYCL_API void rtcIntersect1(RTCScene scene, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
/* Intersects a packet of 4 rays with the scene. */
|
||||
RTC_API void rtcIntersect4(const int* valid, RTCScene scene, struct RTCRayHit4* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
/* Intersects a packet of 8 rays with the scene. */
|
||||
RTC_API void rtcIntersect8(const int* valid, RTCScene scene, struct RTCRayHit8* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
/* Intersects a packet of 16 rays with the scene. */
|
||||
RTC_API void rtcIntersect16(const int* valid, RTCScene scene, struct RTCRayHit16* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
|
||||
/* Forwards ray inside user geometry callback. */
|
||||
RTC_SYCL_API void rtcForwardIntersect1(const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID);
|
||||
|
||||
/* Forwards ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_SYCL_API void rtcForwardIntersect1Ex(const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
|
||||
|
||||
/* Forwards ray packet of size 4 inside user geometry callback. */
|
||||
RTC_API void rtcForwardIntersect4(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID);
|
||||
|
||||
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_API void rtcForwardIntersect4Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID, unsigned int primInstID);
|
||||
|
||||
/* Forwards ray packet of size 8 inside user geometry callback. */
|
||||
RTC_API void rtcForwardIntersect8(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID);
|
||||
|
||||
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_API void rtcForwardIntersect8Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID, unsigned int primInstID);
|
||||
|
||||
/* Forwards ray packet of size 16 inside user geometry callback. */
|
||||
RTC_API void rtcForwardIntersect16(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID);
|
||||
|
||||
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_API void rtcForwardIntersect16Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int primInstID);
|
||||
|
||||
|
||||
/* Tests a single ray for occlusion with the scene. */
|
||||
RTC_SYCL_API void rtcOccluded1(RTCScene scene, struct RTCRay* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
/* Tests a packet of 4 rays for occlusion occluded with the scene. */
|
||||
RTC_API void rtcOccluded4(const int* valid, RTCScene scene, struct RTCRay4* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
/* Tests a packet of 8 rays for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded8(const int* valid, RTCScene scene, struct RTCRay8* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
/* Tests a packet of 16 rays for occlusion with the scene. */
|
||||
RTC_API void rtcOccluded16(const int* valid, RTCScene scene, struct RTCRay16* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
|
||||
|
||||
|
||||
/* Forwards single occlusion ray inside user geometry callback. */
|
||||
RTC_SYCL_API void rtcForwardOccluded1(const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID);
|
||||
|
||||
/* Forwards single occlusion ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_SYCL_API void rtcForwardOccluded1Ex(const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
|
||||
|
||||
/* Forwards occlusion ray packet of size 4 inside user geometry callback. */
|
||||
RTC_API void rtcForwardOccluded4(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID);
|
||||
|
||||
/* Forwards occlusion ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_API void rtcForwardOccluded4Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID, unsigned int instPrimID);
|
||||
|
||||
/* Forwards occlusion ray packet of size 8 inside user geometry callback. */
|
||||
RTC_API void rtcForwardOccluded8(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID);
|
||||
|
||||
/* Forwards occlusion ray packet of size 8 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_API void rtcForwardOccluded8Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID, unsigned int instPrimID);
|
||||
|
||||
/* Forwards occlusion ray packet of size 16 inside user geometry callback. */
|
||||
RTC_API void rtcForwardOccluded16(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID);
|
||||
|
||||
/* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
|
||||
RTC_API void rtcForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID);
|
||||
|
||||
|
||||
/*! collision callback */
|
||||
struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; };
|
||||
typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions, unsigned int num_collisions);
|
||||
|
||||
/*! Performs collision detection of two scenes */
|
||||
RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
/* Helper for easily combining scene flags */
|
||||
inline RTCSceneFlags operator|(RTCSceneFlags a, RTCSceneFlags b) {
|
||||
return (RTCSceneFlags)((size_t)a | (size_t)b);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
RTC_NAMESPACE_END
|
||||
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "../common/builder.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_sort.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
@ -101,7 +102,7 @@ namespace embree
|
|||
}
|
||||
};
|
||||
|
||||
#if defined (__AVX2__)
|
||||
#if defined (__AVX2__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
/*! for AVX2 there is a fast scalar bitInterleave */
|
||||
struct MortonCodeGenerator
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#define MBLUR_NUM_OBJECT_BINS 32
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../common/primref_mb.h"
|
||||
#include "../builders/primref_mb.h"
|
||||
#include "heuristic_binning_array_aligned.h"
|
||||
#include "heuristic_timesplit_array.h"
|
||||
|
||||
|
@ -141,16 +141,17 @@ namespace embree
|
|||
struct VirtualRecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
const SubGridBuildData * const sgrids;
|
||||
|
||||
__forceinline VirtualRecalculatePrimRef (Scene* scene)
|
||||
: scene(scene) {}
|
||||
__forceinline VirtualRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids = nullptr)
|
||||
: scene(scene), sgrids(sgrids) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
|
||||
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range, sgrids);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
@ -166,7 +167,7 @@ namespace embree
|
|||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
|
||||
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range, sgrids);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
||||
|
|
|
@ -7,13 +7,8 @@
|
|||
#include "heuristic_spatial_array.h"
|
||||
#include "heuristic_openmerge_array.h"
|
||||
|
||||
#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
|
||||
# define NUM_OBJECT_BINS 16
|
||||
# define NUM_SPATIAL_BINS 16
|
||||
#else
|
||||
# define NUM_OBJECT_BINS 32
|
||||
# define NUM_SPATIAL_BINS 16
|
||||
#endif
|
||||
#define NUM_OBJECT_BINS 32
|
||||
#define NUM_SPATIAL_BINS 16
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
#include "priminfo_mb.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
|
||||
|
@ -390,6 +391,63 @@ namespace embree
|
|||
}
|
||||
return Split(bestSAH,bestDim,bestPos,mapping);
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
__forceinline Split best_block_size(const BinMapping<BINS>& mapping, const size_t blockSize) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
|
||||
for (size_t i=mapping.size()-1; i>0; i--)
|
||||
{
|
||||
count += counts(i);
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = blockSize-1;
|
||||
vfloat4 blocks_factor = 1.0f/float(blockSize);
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<mapping.size(); i++, ii+=1)
|
||||
{
|
||||
count += counts(i-1);
|
||||
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vfloat4 lCount = floor(vfloat4(count +blocks_add)*blocks_factor);
|
||||
const vfloat4 rCount = floor(vfloat4(rCounts[i]+blocks_add)*blocks_factor);
|
||||
const vfloat4 sah = madd(lArea,lCount,rArea*rCount);
|
||||
|
||||
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
|
||||
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
}
|
||||
}
|
||||
return Split(bestSAH,bestDim,bestPos,mapping);
|
||||
}
|
||||
|
||||
/*! calculates extended split information */
|
||||
__forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
|
||||
|
|
|
@ -22,6 +22,9 @@ namespace embree
|
|||
|
||||
__forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
|
||||
|
||||
__forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
|
@ -30,7 +33,45 @@ namespace embree
|
|||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline range<size_t> get_range() const {
|
||||
return range<size_t>(begin(),end());
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeomBBox3fa::extend_primref(prim);
|
||||
_end++;
|
||||
}
|
||||
};
|
||||
|
||||
inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
const size_t begin = pinfo.begin();
|
||||
const size_t end = pinfo.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend_center2(prims[i]);
|
||||
new (&linfo) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend_center2(prims[i]);
|
||||
new (&rinfo) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
template<typename Type, typename getTypeFunc>
|
||||
inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
CentGeomBBox3fa local_left(empty), local_right(empty);
|
||||
auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };
|
||||
const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);
|
||||
linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);
|
||||
rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);
|
||||
}
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRef, size_t BINS>
|
||||
|
@ -69,6 +110,24 @@ namespace embree
|
|||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
return find_block_size_template<false>(pinfo,blockSize);
|
||||
else
|
||||
return find_block_size_template<true>(pinfo,blockSize);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(pinfo);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
|
||||
return binner.best_block_size(mapping,blockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
|
@ -121,21 +180,8 @@ namespace embree
|
|||
std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
const size_t begin = pinfo.begin();
|
||||
const size_t end = pinfo.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend_center2(prims[i]);
|
||||
new (&linfo) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend_center2(prims[i]);
|
||||
new (&rinfo) PrimInfoRange(center,end,right);
|
||||
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {
|
||||
performFallbackSplit(prims,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
|
@ -156,6 +202,8 @@ namespace embree
|
|||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, size_t BINS>
|
||||
struct HeuristicArrayBinningMB
|
||||
|
@ -196,5 +244,6 @@ namespace embree
|
|||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../common/scene.h"
|
||||
#include "priminfo.h"
|
||||
|
||||
namespace embree
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../common/primref_mb.h"
|
||||
#include "../builders/primref_mb.h"
|
||||
#include "../../common/algorithms/parallel_filter.h"
|
||||
|
||||
#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f
|
||||
|
|
|
@ -3,9 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "../common/primref.h"
|
||||
#include "../common/primref_mb.h"
|
||||
#include "primref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
@ -41,6 +39,10 @@ namespace embree
|
|||
centBounds.extend(center);
|
||||
}
|
||||
|
||||
static void extend_ref (CentGeom& pinfo, const PrimRef& ref) {
|
||||
pinfo.extend_primref(ref);
|
||||
};
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void extend_center2(const PrimRef& prim)
|
||||
{
|
||||
|
@ -84,6 +86,9 @@ namespace embree
|
|||
__forceinline PrimInfoT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), begin(0), end(0) {}
|
||||
|
||||
__forceinline PrimInfoT (size_t N)
|
||||
: CentGeom<BBox>(empty), begin(0), end(N) {}
|
||||
|
||||
__forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
|
||||
|
||||
|
@ -158,205 +163,5 @@ namespace embree
|
|||
|
||||
typedef PrimInfoT<BBox3fa> PrimInfo;
|
||||
//typedef PrimInfoT<LBBox3fa> PrimInfoMB;
|
||||
|
||||
/*! stores bounding information for a set of primitives */
|
||||
template<typename BBox>
|
||||
class PrimInfoMBT : public CentGeom<BBox>
|
||||
{
|
||||
public:
|
||||
using CentGeom<BBox>::geomBounds;
|
||||
using CentGeom<BBox>::centBounds;
|
||||
|
||||
__forceinline PrimInfoMBT () {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoMBT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
__forceinline PrimInfoMBT (size_t begin, size_t end)
|
||||
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeom<BBox>::extend_primref(prim);
|
||||
time_range.extend(prim.time_range);
|
||||
object_range._end++;
|
||||
num_time_segments += prim.size();
|
||||
if (max_num_time_segments < prim.totalTimeSegments()) {
|
||||
max_num_time_segments = prim.totalTimeSegments();
|
||||
max_time_range = prim.time_range;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void merge(const PrimInfoMBT& other)
|
||||
{
|
||||
CentGeom<BBox>::merge(other);
|
||||
time_range.extend(other.time_range);
|
||||
object_range._begin += other.object_range.begin();
|
||||
object_range._end += other.object_range.end();
|
||||
num_time_segments += other.num_time_segments;
|
||||
if (max_num_time_segments < other.max_num_time_segments) {
|
||||
max_num_time_segments = other.max_num_time_segments;
|
||||
max_time_range = other.max_time_range;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
|
||||
PrimInfoMBT r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
__forceinline size_t begin() const {
|
||||
return object_range.begin();
|
||||
}
|
||||
|
||||
__forceinline size_t end() const {
|
||||
return object_range.end();
|
||||
}
|
||||
|
||||
/*! returns the number of primitives */
|
||||
__forceinline size_t size() const {
|
||||
return object_range.size();
|
||||
}
|
||||
|
||||
__forceinline float halfArea() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline float align_time(float ct) const
|
||||
{
|
||||
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
|
||||
float t0 = (ct-max_time_range.lower)/max_time_range.size();
|
||||
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
|
||||
return t1*max_time_range.size()+max_time_range.lower;
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
|
||||
{
|
||||
return cout << "PrimInfo { " <<
|
||||
"object_range = " << pinfo.object_range <<
|
||||
", time_range = " << pinfo.time_range <<
|
||||
", time_segments = " << pinfo.num_time_segments <<
|
||||
", geomBounds = " << pinfo.geomBounds <<
|
||||
", centBounds = " << pinfo.centBounds <<
|
||||
"}";
|
||||
}
|
||||
|
||||
public:
|
||||
range<size_t> object_range; //!< primitive range
|
||||
size_t num_time_segments; //!< total number of time segments of all added primrefs
|
||||
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
|
||||
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
|
||||
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
|
||||
};
|
||||
|
||||
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
|
||||
|
||||
struct SetMB : public PrimInfoMB
|
||||
{
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
|
||||
__forceinline SetMB() {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
|
||||
: PrimInfoMB(pinfo_i), prims(prims) {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
object_range = object_range_in;
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
void deterministic_order() const
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
PrimRefMB* prim = prims->data();
|
||||
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
|
||||
pinfo.add_primref(ref1);
|
||||
};
|
||||
return pinfo;
|
||||
};
|
||||
|
||||
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
|
||||
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
|
||||
|
||||
return SetMB(pinfo,prims,object_range,time_range);
|
||||
}
|
||||
|
||||
public:
|
||||
PrimRefVector prims;
|
||||
};
|
||||
//}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "primref_mb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! stores bounding information for a set of primitives */
|
||||
template<typename BBox>
|
||||
class PrimInfoMBT : public CentGeom<BBox>
|
||||
{
|
||||
public:
|
||||
using CentGeom<BBox>::geomBounds;
|
||||
using CentGeom<BBox>::centBounds;
|
||||
|
||||
__forceinline PrimInfoMBT () {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoMBT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
__forceinline PrimInfoMBT (size_t begin, size_t end)
|
||||
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeom<BBox>::extend_primref(prim);
|
||||
time_range.extend(prim.time_range);
|
||||
object_range._end++;
|
||||
num_time_segments += prim.size();
|
||||
if (max_num_time_segments < prim.totalTimeSegments()) {
|
||||
max_num_time_segments = prim.totalTimeSegments();
|
||||
max_time_range = prim.time_range;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void merge(const PrimInfoMBT& other)
|
||||
{
|
||||
CentGeom<BBox>::merge(other);
|
||||
time_range.extend(other.time_range);
|
||||
object_range._begin += other.object_range.begin();
|
||||
object_range._end += other.object_range.end();
|
||||
num_time_segments += other.num_time_segments;
|
||||
if (max_num_time_segments < other.max_num_time_segments) {
|
||||
max_num_time_segments = other.max_num_time_segments;
|
||||
max_time_range = other.max_time_range;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
|
||||
PrimInfoMBT r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
__forceinline size_t begin() const {
|
||||
return object_range.begin();
|
||||
}
|
||||
|
||||
__forceinline size_t end() const {
|
||||
return object_range.end();
|
||||
}
|
||||
|
||||
/*! returns the number of primitives */
|
||||
__forceinline size_t size() const {
|
||||
return object_range.size();
|
||||
}
|
||||
|
||||
__forceinline float halfArea() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline float align_time(float ct) const
|
||||
{
|
||||
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
|
||||
float t0 = (ct-max_time_range.lower)/max_time_range.size();
|
||||
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
|
||||
return t1*max_time_range.size()+max_time_range.lower;
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
|
||||
{
|
||||
return cout << "PrimInfo { " <<
|
||||
"object_range = " << pinfo.object_range <<
|
||||
", time_range = " << pinfo.time_range <<
|
||||
", time_segments = " << pinfo.num_time_segments <<
|
||||
", geomBounds = " << pinfo.geomBounds <<
|
||||
", centBounds = " << pinfo.centBounds <<
|
||||
"}";
|
||||
}
|
||||
|
||||
public:
|
||||
range<size_t> object_range; //!< primitive range
|
||||
size_t num_time_segments; //!< total number of time segments of all added primrefs
|
||||
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
|
||||
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
|
||||
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
|
||||
};
|
||||
|
||||
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
|
||||
|
||||
struct SetMB : public PrimInfoMB
|
||||
{
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
|
||||
__forceinline SetMB() {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
|
||||
: PrimInfoMB(pinfo_i), prims(prims) {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
object_range = object_range_in;
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
void deterministic_order() const
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
PrimRefMB* prim = prims->data();
|
||||
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
|
||||
pinfo.add_primref(ref1);
|
||||
};
|
||||
return pinfo;
|
||||
};
|
||||
|
||||
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
|
||||
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
|
||||
|
||||
return SetMB(pinfo,prims,object_range,time_range);
|
||||
}
|
||||
|
||||
public:
|
||||
PrimRefVector prims;
|
||||
};
|
||||
//}
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "../common/default.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
@ -118,7 +118,8 @@ namespace embree
|
|||
std::swap(a,b);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "../common/default.h"
|
||||
|
||||
#define MBLUR_BIN_LBBOX 1
|
||||
|
|
@ -55,6 +55,29 @@ namespace embree
|
|||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
|
@ -104,6 +127,32 @@ namespace embree
|
|||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
}
|
||||
|
||||
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
|
||||
pinfo.time_range = t0t1;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
template<typename Mesh>
|
||||
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
|
@ -218,26 +267,8 @@ namespace embree
|
|||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
k = base.size();
|
||||
size_t p_index = k;
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j)) continue;
|
||||
const GridMesh::Grid &g = mesh->grid(j);
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
|
||||
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index);
|
||||
pinfo.add_center2(prim);
|
||||
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[p_index++] = prim;
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
assert(pinfo.size() == numPrimitives);
|
||||
return pinfo;
|
||||
}
|
||||
|
@ -269,40 +300,60 @@ namespace embree
|
|||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
|
||||
{
|
||||
|
||||
size_t p_index = base.size();
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j)) continue;
|
||||
const GridMesh::Grid &g = mesh->grid(j);
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
|
||||
const PrimRef prim(bounds,geomID_,unsigned(p_index));
|
||||
pinfo.add_center2(prim);
|
||||
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[p_index++] = prim;
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID_);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
|
||||
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
LBBox3fa bounds(empty);
|
||||
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
|
||||
pinfoMB.merge(gridMB);
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfoMB.size();
|
||||
if (numPrimitives == 0) return pinfoMB;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
assert(pinfoMB.size() == numPrimitives);
|
||||
pinfoMB.time_range = t0t1;
|
||||
return pinfoMB;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
|
||||
|
||||
IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_INSTANCE_ARRAY (template size_t createMortonCodeArray<InstanceArray>(InstanceArray* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,9 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "../common/scene.h"
|
||||
#include "../common/primref.h"
|
||||
#include "../common/primref_mb.h"
|
||||
#include "priminfo.h"
|
||||
#include "priminfo_mb.h"
|
||||
#include "bvh_builder_morton.h"
|
||||
|
||||
namespace embree
|
||||
|
@ -16,19 +15,23 @@ namespace embree
|
|||
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
|
||||
template<typename Mesh>
|
||||
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
/* special variants for grids */
|
||||
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
|
||||
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); // FIXME: remove
|
||||
|
||||
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
|
||||
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,10 +3,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_sort.h"
|
||||
#include "../builders/heuristic_spatial.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
|
@ -14,96 +16,46 @@
|
|||
#define CHECK_PRESPLIT(x)
|
||||
|
||||
#define GRID_SIZE 1024
|
||||
//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6
|
||||
#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
|
||||
#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
|
||||
#define PRIORITY_CUTOFF_THRESHOLD 1.0f
|
||||
//#define PRIORITY_CUTOFF_THRESHOLD 2.0f
|
||||
#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
|
||||
struct PresplitItem
|
||||
struct SplittingGrid
|
||||
{
|
||||
union {
|
||||
float priority;
|
||||
unsigned int data;
|
||||
};
|
||||
unsigned int index;
|
||||
|
||||
__forceinline operator unsigned() const
|
||||
__forceinline SplittingGrid(const BBox3fa& bounds)
|
||||
{
|
||||
return reinterpret_cast<const unsigned&>(priority);
|
||||
}
|
||||
__forceinline bool operator < (const PresplitItem& item) const
|
||||
{
|
||||
return (priority < item.priority);
|
||||
base = bounds.lower;
|
||||
const Vec3fa diag = bounds.size();
|
||||
extend = max(diag.x,max(diag.y,diag.z));
|
||||
scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;
|
||||
}
|
||||
|
||||
template<typename Mesh>
|
||||
__forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc)
|
||||
{
|
||||
const unsigned int geomID = ref.geomID();
|
||||
const unsigned int primID = ref.primID();
|
||||
const float area_aabb = area(ref.bounds());
|
||||
const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
|
||||
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
|
||||
assert(area_prim <= area_aabb);
|
||||
//const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
|
||||
const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
|
||||
assert(priority >= 0.0f && priority < FLT_LARGE);
|
||||
return priority;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
|
||||
return cout << "index " << item.index << " priority " << item.priority;
|
||||
};
|
||||
|
||||
template<typename SplitterFactory>
|
||||
void splitPrimitive(SplitterFactory &Splitter,
|
||||
const PrimRef &prim,
|
||||
const unsigned int geomID,
|
||||
const unsigned int primID,
|
||||
const unsigned int split_level,
|
||||
const Vec3fa &grid_base,
|
||||
const float grid_scale,
|
||||
const float grid_extend,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
|
||||
if (split_level == 0)
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
}
|
||||
else
|
||||
__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const
|
||||
{
|
||||
/* compute morton code */
|
||||
const Vec3fa lower = prim.lower;
|
||||
const Vec3fa upper = prim.upper;
|
||||
const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
|
||||
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)));
|
||||
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
|
||||
|
||||
/* if all bits are equal then we cannot split */
|
||||
if(unlikely(lower_code == upper_code))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
return;
|
||||
}
|
||||
if (unlikely(lower_code == upper_code))
|
||||
return false;
|
||||
|
||||
/* compute octree level and dimension to perform the split in */
|
||||
const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
|
||||
|
@ -115,25 +67,166 @@ namespace embree
|
|||
|
||||
/* compute world space position of split */
|
||||
const float inv_grid_size = 1.0f / GRID_SIZE;
|
||||
const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend;
|
||||
const float fsplit = base[dim] + isplit * inv_grid_size * extend;
|
||||
assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);
|
||||
|
||||
assert(prim.lower[dim] <= fsplit &&
|
||||
prim.upper[dim] >= fsplit);
|
||||
|
||||
dim_o = dim;
|
||||
fsplit_o = fsplit;
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline Vec2i computeMC(const PrimRef& ref) const
|
||||
{
|
||||
const Vec3fa lower = ref.lower;
|
||||
const Vec3fa upper = ref.upper;
|
||||
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
return Vec2i(lower_code,upper_code);
|
||||
}
|
||||
|
||||
Vec3fa base;
|
||||
float scale;
|
||||
float extend;
|
||||
};
|
||||
|
||||
struct PresplitItem
|
||||
{
|
||||
union {
|
||||
float priority;
|
||||
unsigned int data;
|
||||
};
|
||||
unsigned int index;
|
||||
|
||||
__forceinline operator unsigned() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
template<typename ProjectedPrimitiveAreaFunc>
|
||||
__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)
|
||||
{
|
||||
const float area_aabb = area(ref.bounds());
|
||||
const float area_prim = primitiveArea(ref);
|
||||
if (area_prim == 0.0f) return 0.0f;
|
||||
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
|
||||
//assert(area_prim <= area_aabb); // may trigger due to numerical issues
|
||||
const float area_diff = max(0.0f, area_aabb - area_prim);
|
||||
//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
|
||||
const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
|
||||
//const float priority = sqrtf(sqrtf( area_diff ) );
|
||||
//const float priority = sqrtfarea_diff;
|
||||
//const float priority = area_diff; // 104 fps !!!!!!!!!!
|
||||
//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps
|
||||
//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);
|
||||
//const float priority = area_prim;
|
||||
assert(priority >= 0.0f && priority < FLT_LARGE);
|
||||
return priority;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
|
||||
return cout << "index " << item.index << " priority " << item.priority;
|
||||
};
|
||||
|
||||
#if 1
|
||||
|
||||
template<typename Splitter>
|
||||
void splitPrimitive(const Splitter& splitter,
|
||||
const PrimRef& prim,
|
||||
const unsigned int splitprims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
if (splitprims == 1)
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int dim; float fsplit;
|
||||
if (!grid.split_pos(prim, dim, fsplit))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
return;
|
||||
}
|
||||
|
||||
/* split primitive */
|
||||
const auto splitter = Splitter(prim);
|
||||
BBox3fa left,right;
|
||||
splitter(prim.bounds(),dim,fsplit,left,right);
|
||||
assert(!left.empty());
|
||||
assert(!right.empty());
|
||||
PrimRef left,right;
|
||||
splitter(prim,dim,fsplit,left,right);
|
||||
assert(!left.bounds().empty());
|
||||
assert(!right.bounds().empty());
|
||||
|
||||
|
||||
splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
|
||||
splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
|
||||
const unsigned int splitprims_left = splitprims/2;
|
||||
const unsigned int splitprims_right = splitprims - splitprims_left;
|
||||
splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);
|
||||
splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<typename Splitter>
|
||||
void splitPrimitive(const Splitter& splitter,
|
||||
const PrimRef& prim,
|
||||
const unsigned int targetSubPrims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
auto compare = [] ( const PrimRef& a, const PrimRef& b ) {
|
||||
return area(a.bounds()) < area(b.bounds());
|
||||
};
|
||||
|
||||
subPrims[numSubPrims++] = prim;
|
||||
|
||||
while (numSubPrims < targetSubPrims)
|
||||
{
|
||||
/* get top heap element */
|
||||
std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);
|
||||
PrimRef top = subPrims[--numSubPrims];
|
||||
|
||||
unsigned int dim; float fsplit;
|
||||
if (!grid.split_pos(top, dim, fsplit))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = top;
|
||||
return;
|
||||
}
|
||||
|
||||
/* split primitive */
|
||||
PrimRef left,right;
|
||||
splitter(top,dim,fsplit,left,right);
|
||||
assert(!left.bounds().empty());
|
||||
assert(!right.bounds().empty());
|
||||
|
||||
subPrims[numSubPrims++] = left;
|
||||
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
|
||||
|
||||
subPrims[numSubPrims++] = right;
|
||||
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
|
@ -155,30 +248,181 @@ namespace embree
|
|||
}
|
||||
return pinfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref)
|
||||
template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>
|
||||
PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,
|
||||
PrimVector& prims,
|
||||
const PrimInfo& pinfo,
|
||||
const SplitPrimitiveFunc& splitPrimitive,
|
||||
const ProjectedPrimitiveAreaFunc& primitiveArea)
|
||||
{
|
||||
const Vec3fa lower = ref.lower;
|
||||
const Vec3fa upper = ref.upper;
|
||||
const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
return Vec2i(lower_code,upper_code);
|
||||
}
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
static const size_t MIN_STEP_SIZE = 128;
|
||||
|
||||
/* use correct number of primitives */
|
||||
size_t numPrimitives = pinfo.size();
|
||||
const size_t numPrimitivesExt = prims.size();
|
||||
const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;
|
||||
|
||||
/* allocate double buffer presplit items */
|
||||
avector<PresplitItem> preSplitItem0(numPrimitivesExt);
|
||||
avector<PresplitItem> preSplitItem1(numPrimitivesExt);
|
||||
|
||||
/* compute grid */
|
||||
SplittingGrid grid(pinfo.geomBounds);
|
||||
|
||||
/* init presplit items and get total sum */
|
||||
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
|
||||
float sum = 0.0f;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
preSplitItem0[i].index = (unsigned int)i;
|
||||
const Vec2i mc = grid.computeMC(prims[i]);
|
||||
/* if all bits are equal then we cannot split */
|
||||
preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;
|
||||
/* FIXME: sum undeterministic */
|
||||
sum += preSplitItem0[i].priority;
|
||||
}
|
||||
return sum;
|
||||
},[](const float& a, const float& b) -> float { return a+b; });
|
||||
|
||||
/* compute number of splits per primitive */
|
||||
const float inv_psum = 1.0f / psum;
|
||||
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (preSplitItem0[i].priority <= 0.0f) {
|
||||
preSplitItem0[i].data = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;
|
||||
if (rel_p < 1) {
|
||||
preSplitItem0[i].data = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);
|
||||
preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
|
||||
preSplitItem0[i].data = 1 << preSplitItem0[i].data;
|
||||
assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
}
|
||||
});
|
||||
|
||||
auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };
|
||||
size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);
|
||||
assert(center <= numPrimitives);
|
||||
|
||||
/* anything to split ? */
|
||||
if (center >= numPrimitives)
|
||||
return pinfo;
|
||||
|
||||
size_t numPrimitivesToSplit = numPrimitives - center;
|
||||
assert(preSplitItem0[center].data >= 1.0f);
|
||||
|
||||
/* sort presplit items in ascending order */
|
||||
radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);
|
||||
|
||||
CHECK_PRESPLIT(
|
||||
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);
|
||||
});
|
||||
);
|
||||
|
||||
unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();
|
||||
unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;
|
||||
|
||||
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
|
||||
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
|
||||
size_t sum = 0;
|
||||
for (size_t i=t.begin(); i<t.end(); i++)
|
||||
{
|
||||
const unsigned int primrefID = preSplitItem0[i].index;
|
||||
const unsigned int splitprims = preSplitItem0[i].data;
|
||||
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
unsigned int numSubPrims = 0;
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
|
||||
assert(numSubPrims);
|
||||
|
||||
numSubPrims--; // can reuse slot
|
||||
sum+=numSubPrims;
|
||||
preSplitItem0[i].data = (numSubPrims << 16) | splitprims;
|
||||
|
||||
primOffset0[i-center] = numSubPrims;
|
||||
}
|
||||
return sum;
|
||||
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
|
||||
|
||||
/* if we are over budget, need to shrink the range */
|
||||
if (totalNumSubPrims > numSplitPrimitivesBudget)
|
||||
{
|
||||
size_t new_center = numPrimitives-1;
|
||||
size_t sum = 0;
|
||||
for (;new_center>=center;new_center--)
|
||||
{
|
||||
const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;
|
||||
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
|
||||
sum += numSubPrims;
|
||||
}
|
||||
new_center++;
|
||||
|
||||
primOffset0 += new_center - center;
|
||||
numPrimitivesToSplit -= new_center - center;
|
||||
center = new_center;
|
||||
assert(numPrimitivesToSplit == (numPrimitives - center));
|
||||
}
|
||||
|
||||
/* parallel prefix sum to compute offsets for storing sub-primitives */
|
||||
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
|
||||
assert(numPrimitives+offset <= numPrimitivesExt);
|
||||
|
||||
/* iterate over range, and split primitives into sub primitives and append them to prims array */
|
||||
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
|
||||
for (size_t j=rn.begin(); j<rn.end(); j++)
|
||||
{
|
||||
const unsigned int primrefID = preSplitItem0[j].index;
|
||||
const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;
|
||||
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
unsigned int numSubPrims = 0;
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
|
||||
|
||||
const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;
|
||||
assert(numSubPrims-1 == numSubPrimsExpected);
|
||||
|
||||
const size_t newID = numPrimitives + primOffset1[j-center];
|
||||
assert(newID+numSubPrims-1 <= numPrimitivesExt);
|
||||
|
||||
prims[primrefID] = subPrims[0];
|
||||
for (size_t i=1;i<numSubPrims;i++)
|
||||
prims[newID+i-1] = subPrims[i];
|
||||
}
|
||||
});
|
||||
|
||||
numPrimitives += offset;
|
||||
|
||||
/* recompute centroid bounding boxes */
|
||||
const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
|
||||
PrimInfo p(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
p.add_center2(prims[j]);
|
||||
return p;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
assert(pinfo1.size() == numPrimitives);
|
||||
|
||||
return pinfo1;
|
||||
}
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
|
@ -198,179 +442,27 @@ namespace embree
|
|||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
|
||||
/* use correct number of primitives */
|
||||
size_t numPrimitives = pinfo.size();
|
||||
const size_t alloc_numPrimitives = prims.size();
|
||||
const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives;
|
||||
|
||||
/* set up primitive splitter */
|
||||
SplitterFactory Splitter(scene);
|
||||
|
||||
|
||||
DBG_PRESPLIT(
|
||||
const size_t org_numPrimitives = pinfo.size();
|
||||
PRINT(numPrimitives);
|
||||
PRINT(alloc_numPrimitives);
|
||||
PRINT(numSplitPrimitivesBudget);
|
||||
);
|
||||
|
||||
/* allocate double buffer presplit items */
|
||||
const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives;
|
||||
PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
|
||||
PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
|
||||
|
||||
/* compute grid */
|
||||
const Vec3fa grid_base = pinfo.geomBounds.lower;
|
||||
const Vec3fa grid_diag = pinfo.geomBounds.size();
|
||||
const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z));
|
||||
const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend;
|
||||
|
||||
/* init presplit items and get total sum */
|
||||
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
|
||||
float sum = 0.0f;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
presplitItem[i].index = (unsigned int)i;
|
||||
const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]);
|
||||
/* if all bits are equal then we cannot split */
|
||||
presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f;
|
||||
/* FIXME: sum undeterministic */
|
||||
sum += presplitItem[i].priority;
|
||||
}
|
||||
return sum;
|
||||
},[](const float& a, const float& b) -> float { return a+b; });
|
||||
|
||||
/* compute number of splits per primitive */
|
||||
const float inv_psum = 1.0f / psum;
|
||||
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (presplitItem[i].priority > 0.0f)
|
||||
{
|
||||
const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum;
|
||||
if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims
|
||||
{
|
||||
presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
|
||||
//presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
|
||||
assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
|
||||
}
|
||||
else
|
||||
presplitItem[i].priority = 0.0f;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; };
|
||||
size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024);
|
||||
|
||||
/* anything to split ? */
|
||||
if (center < numPrimitives)
|
||||
{
|
||||
size_t numPrimitivesToSplit = numPrimitives - center;
|
||||
assert(presplitItem[center].priority >= 1.0f);
|
||||
|
||||
/* sort presplit items in ascending order */
|
||||
radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024);
|
||||
|
||||
CHECK_PRESPLIT(
|
||||
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
assert(presplitItem[i-1].priority <= presplitItem[i].priority);
|
||||
});
|
||||
);
|
||||
|
||||
unsigned int* primOffset0 = (unsigned int*)tmp_presplitItem;
|
||||
unsigned int* primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
|
||||
|
||||
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
|
||||
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
|
||||
size_t sum = 0;
|
||||
for (size_t i=t.begin(); i<t.end(); i++)
|
||||
{
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
assert(presplitItem[i].priority >= 1.0f);
|
||||
const unsigned int primrefID = presplitItem[i].index;
|
||||
const float prio = presplitItem[i].priority;
|
||||
const unsigned int geomID = prims[primrefID].geomID();
|
||||
const unsigned int primID = prims[primrefID].primID();
|
||||
const unsigned int split_levels = (unsigned int)prio;
|
||||
unsigned int numSubPrims = 0;
|
||||
splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
|
||||
assert(numSubPrims);
|
||||
numSubPrims--; // can reuse slot
|
||||
sum+=numSubPrims;
|
||||
presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels;
|
||||
primOffset0[i-center] = numSubPrims;
|
||||
}
|
||||
return sum;
|
||||
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
|
||||
|
||||
/* if we are over budget, need to shrink the range */
|
||||
if (totalNumSubPrims > numSplitPrimitivesBudget)
|
||||
{
|
||||
size_t new_center = numPrimitives-1;
|
||||
size_t sum = 0;
|
||||
for (;new_center>=center;new_center--)
|
||||
{
|
||||
const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG;
|
||||
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
|
||||
sum += numSubPrims;
|
||||
}
|
||||
new_center++;
|
||||
|
||||
primOffset0 += new_center - center;
|
||||
numPrimitivesToSplit -= new_center - center;
|
||||
center = new_center;
|
||||
assert(numPrimitivesToSplit == (numPrimitives - center));
|
||||
}
|
||||
|
||||
/* parallel prefix sum to compute offsets for storing sub-primitives */
|
||||
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
|
||||
assert(numPrimitives+offset <= alloc_numPrimitives);
|
||||
|
||||
/* iterate over range, and split primitives into sub primitives and append them to prims array */
|
||||
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
|
||||
for (size_t j=rn.begin(); j<rn.end(); j++)
|
||||
{
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
const unsigned int primrefID = presplitItem[j].index;
|
||||
const unsigned int geomID = prims[primrefID].geomID();
|
||||
const unsigned int primID = prims[primrefID].primID();
|
||||
const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1);
|
||||
|
||||
assert(split_levels);
|
||||
assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
|
||||
unsigned int numSubPrims = 0;
|
||||
splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
|
||||
const size_t newID = numPrimitives + primOffset1[j-center];
|
||||
assert(newID+numSubPrims-1 <= alloc_numPrimitives);
|
||||
prims[primrefID] = subPrims[0];
|
||||
for (size_t i=1;i<numSubPrims;i++)
|
||||
prims[newID+i-1] = subPrims[i];
|
||||
}
|
||||
});
|
||||
|
||||
numPrimitives += offset;
|
||||
DBG_PRESPLIT(
|
||||
PRINT(pinfo.size());
|
||||
PRINT(numPrimitives);
|
||||
PRINT((float)numPrimitives/org_numPrimitives));
|
||||
}
|
||||
|
||||
/* recompute centroid bounding boxes */
|
||||
pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
|
||||
PrimInfo p(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
p.add_center2(prims[j]);
|
||||
return p;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
assert(pinfo.size() == numPrimitives);
|
||||
auto split_primitive = [&] (const PrimRef &prim,
|
||||
const unsigned int splitprims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
const auto splitter = Splitter(prim);
|
||||
splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);
|
||||
};
|
||||
|
||||
/* free double buffer presplit items */
|
||||
alignedFree(tmp_presplitItem);
|
||||
alignedFree(presplitItem);
|
||||
return pinfo;
|
||||
auto primitiveArea = [&] (const PrimRef &ref) {
|
||||
const unsigned int geomID = ref.geomID();
|
||||
const unsigned int primID = ref.primID();
|
||||
return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
|
||||
};
|
||||
|
||||
return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,13 +3,51 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
#include "../common/scene.h"
|
||||
#include "../common/primref.h"
|
||||
#endif
|
||||
|
||||
#include "../builders/primref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const BBox3fa& bounds,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
BBox3fa& left_o,
|
||||
BBox3fa& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
/* clip triangle to left and right box by processing all edges */
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const float inv_length = 1.0f/(v1d-v0d);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
left_o = intersect(left,bounds);
|
||||
right_o = intersect(right,bounds);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const BBox3fa& bounds,
|
||||
const size_t dim,
|
||||
|
@ -78,7 +116,9 @@ namespace embree
|
|||
new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID());
|
||||
new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
|
||||
}
|
||||
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
struct TriangleSplitter
|
||||
{
|
||||
__forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
|
||||
|
@ -173,6 +213,13 @@ namespace embree
|
|||
__forceinline DummySplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct DummySplitterFactory
|
||||
|
@ -187,7 +234,7 @@ namespace embree
|
|||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ namespace embree
|
|||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
/*! BVHN default constructor. */
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "../geometry/subdivpatch1.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
#include "../common/accelinstance.h"
|
||||
|
||||
|
@ -66,6 +67,9 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
|
||||
|
@ -104,6 +108,9 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
|
||||
|
@ -142,6 +149,9 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
|
||||
|
@ -180,33 +190,20 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4IntersectorStreamPacketFallback);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoellerNoFilter);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4vIntersectorStreamPluecker);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamPluecker);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoellerNoFilter);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamPluecker);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamPluecker);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
|
@ -237,7 +234,10 @@ namespace embree
|
|||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
|
@ -266,6 +266,7 @@ namespace embree
|
|||
IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelQuadMeshSAH));
|
||||
IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelVirtualSAH));
|
||||
IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelInstanceSAH));
|
||||
IF_ENABLED_INSTANCE_ARRAY (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelInstanceArraySAH));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4vBuilder_OBB_New));
|
||||
IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4iBuilder_OBB_New));
|
||||
|
@ -296,7 +297,10 @@ namespace embree
|
|||
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceSceneBuilderSAH));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceMBSceneBuilderSAH));
|
||||
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceArraySceneBuilderSAH));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceArrayMBSceneBuilderSAH));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridSceneBuilderSAH));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridMBSceneBuilderSAH));
|
||||
|
||||
|
@ -349,6 +353,9 @@ namespace embree
|
|||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersector1));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector1));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayIntersector1));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayMBIntersector1));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector1Moeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridMBIntersector1Moeller))
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector1Pluecker));
|
||||
|
@ -389,7 +396,10 @@ namespace embree
|
|||
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersector4Chunk));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector4Chunk));
|
||||
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayIntersector4Chunk));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayMBIntersector4Chunk));
|
||||
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector4HybridMoeller));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector4HybridMoeller));
|
||||
|
@ -424,13 +434,16 @@ namespace embree
|
|||
|
||||
IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4SubdivPatch1Intersector8));
|
||||
IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4SubdivPatch1MBIntersector8));
|
||||
|
||||
|
||||
IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4VirtualIntersector8Chunk));
|
||||
IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4VirtualMBIntersector8Chunk));
|
||||
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceIntersector8Chunk));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector8Chunk));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceArrayIntersector8Chunk));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceArrayMBIntersector8Chunk));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridIntersector8HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridMBIntersector8HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridIntersector8HybridPluecker));
|
||||
|
@ -470,29 +483,13 @@ namespace embree
|
|||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceIntersector16Chunk));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceMBIntersector16Chunk));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceArrayIntersector16Chunk));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceArrayMBIntersector16Chunk));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridIntersector16HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridMBIntersector16HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridIntersector16HybridPluecker));
|
||||
|
||||
/* select stream intersectors */
|
||||
SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4IntersectorStreamPacketFallback);
|
||||
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4IntersectorStreamMoeller));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4IntersectorStreamMoellerNoFilter));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersectorStreamMoeller));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vIntersectorStreamPluecker));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersectorStreamPluecker));
|
||||
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamMoeller));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamMoellerNoFilter));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersectorStreamMoeller));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamPluecker));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersectorStreamPluecker));
|
||||
|
||||
IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualIntersectorStream));
|
||||
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersectorStream));
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -509,7 +506,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4Hybrid();
|
||||
intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8Hybrid();
|
||||
intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16Hybrid();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -523,7 +519,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4Hybrid();
|
||||
intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8Hybrid();
|
||||
intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16Hybrid();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -545,7 +540,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4HybridMB();
|
||||
intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8HybridMB();
|
||||
intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16HybridMB();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -559,7 +553,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4HybridMB();
|
||||
intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8HybridMB();
|
||||
intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16HybridMB();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -581,8 +574,6 @@ namespace embree
|
|||
intersectors.intersector8_nofilter = BVH4Triangle4Intersector8HybridMoellerNoFilter();
|
||||
intersectors.intersector16_filter = BVH4Triangle4Intersector16HybridMoeller();
|
||||
intersectors.intersector16_nofilter = BVH4Triangle4Intersector16HybridMoellerNoFilter();
|
||||
intersectors.intersectorN_filter = BVH4Triangle4IntersectorStreamMoeller();
|
||||
intersectors.intersectorN_nofilter = BVH4Triangle4IntersectorStreamMoellerNoFilter();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -597,7 +588,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4vIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Triangle4vIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH4Triangle4vIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4Triangle4vIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -614,7 +604,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4iIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4Triangle4iIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH4Triangle4iIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4Triangle4iIntersectorStreamMoeller();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -627,7 +616,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4iIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Triangle4iIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH4Triangle4iIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4Triangle4iIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -647,7 +635,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -660,7 +647,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -680,7 +666,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -693,7 +678,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -716,8 +700,6 @@ namespace embree
|
|||
intersectors.intersector8_nofilter = BVH4Quad4vIntersector8HybridMoellerNoFilter();
|
||||
intersectors.intersector16_filter = BVH4Quad4vIntersector16HybridMoeller();
|
||||
intersectors.intersector16_nofilter = BVH4Quad4vIntersector16HybridMoellerNoFilter();
|
||||
intersectors.intersectorN_filter = BVH4Quad4vIntersectorStreamMoeller();
|
||||
intersectors.intersectorN_nofilter = BVH4Quad4vIntersectorStreamMoellerNoFilter();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -730,7 +712,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Quad4vIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Quad4vIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH4Quad4vIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4Quad4vIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -750,7 +731,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Quad4iIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4Quad4iIntersector8HybridMoeller();
|
||||
intersectors.intersector16= BVH4Quad4iIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4Quad4iIntersectorStreamMoeller();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -763,7 +743,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Quad4iIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Quad4iIntersector8HybridPluecker();
|
||||
intersectors.intersector16= BVH4Quad4iIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4Quad4iIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -783,7 +762,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16= BVH4Quad4iMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -796,7 +774,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridPluecker();
|
||||
intersectors.intersector16= BVH4Quad4iMBIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -829,7 +806,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4VirtualIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH4VirtualIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH4VirtualIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH4VirtualIntersectorStream();
|
||||
#endif
|
||||
intersectors.collider = BVH4ColliderUserGeom();
|
||||
return intersectors;
|
||||
|
@ -844,7 +820,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4VirtualMBIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH4VirtualMBIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH4VirtualMBIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -858,7 +833,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4InstanceIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH4InstanceIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH4InstanceIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH4InstanceIntersectorStream();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -872,7 +846,32 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4InstanceMBIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH4InstanceMBIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH4InstanceMBIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
||||
Accel::Intersectors BVH4Factory::BVH4InstanceArrayIntersectors(BVH4* bvh)
|
||||
{
|
||||
Accel::Intersectors intersectors;
|
||||
intersectors.ptr = bvh;
|
||||
intersectors.intersector1 = BVH4InstanceArrayIntersector1();
|
||||
#if defined (EMBREE_RAY_PACKETS)
|
||||
intersectors.intersector4 = BVH4InstanceArrayIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH4InstanceArrayIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH4InstanceArrayIntersector16Chunk();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
||||
Accel::Intersectors BVH4Factory::BVH4InstanceArrayMBIntersectors(BVH4* bvh)
|
||||
{
|
||||
Accel::Intersectors intersectors;
|
||||
intersectors.ptr = bvh;
|
||||
intersectors.intersector1 = BVH4InstanceArrayMBIntersector1();
|
||||
#if defined (EMBREE_RAY_PACKETS)
|
||||
intersectors.intersector4 = BVH4InstanceArrayMBIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH4InstanceArrayMBIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH4InstanceArrayMBIntersector16Chunk();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -886,7 +885,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4SubdivPatch1Intersector4();
|
||||
intersectors.intersector8 = BVH4SubdivPatch1Intersector8();
|
||||
intersectors.intersector16 = BVH4SubdivPatch1Intersector16();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -900,7 +898,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4SubdivPatch1MBIntersector4();
|
||||
intersectors.intersector8 = BVH4SubdivPatch1MBIntersector8();
|
||||
intersectors.intersector16 = BVH4SubdivPatch1MBIntersector16();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -1255,6 +1252,35 @@ namespace embree
|
|||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel* BVH4Factory::BVH4InstanceArray(Scene* scene, BuildVariant bvariant)
|
||||
{
|
||||
BVH4* accel = new BVH4(InstanceArrayPrimitive::type,scene);
|
||||
Accel::Intersectors intersectors = BVH4InstanceArrayIntersectors(accel);
|
||||
auto gtype = Geometry::MTY_INSTANCE_ARRAY;
|
||||
|
||||
Builder* builder = nullptr;
|
||||
if (scene->device->object_builder == "default") {
|
||||
switch (bvariant) {
|
||||
case BuildVariant::STATIC : builder = BVH4InstanceArraySceneBuilderSAH(accel,scene,gtype); break;
|
||||
case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false); break;
|
||||
case BuildVariant::HIGH_QUALITY: assert(false); break;
|
||||
}
|
||||
}
|
||||
else if (scene->device->object_builder == "sah") { builder = BVH4InstanceArraySceneBuilderSAH(accel,scene,gtype); }
|
||||
else if (scene->device->object_builder == "dynamic") { builder = BVH4BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false); }
|
||||
else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>");
|
||||
|
||||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel* BVH4Factory::BVH4InstanceArrayMB(Scene* scene)
|
||||
{
|
||||
BVH4* accel = new BVH4(InstanceArrayPrimitive::type,scene);
|
||||
Accel::Intersectors intersectors = BVH4InstanceArrayMBIntersectors(accel);
|
||||
Builder* builder = BVH4InstanceArrayMBSceneBuilderSAH(accel,scene,Geometry::MTY_INSTANCE_ARRAY);
|
||||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel::Intersectors BVH4Factory::BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant)
|
||||
{
|
||||
Accel::Intersectors intersectors;
|
||||
|
@ -1266,7 +1292,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4GridIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4GridIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH4GridIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
}
|
||||
else /* if (ivariant == IntersectVariant::ROBUST) */
|
||||
|
@ -1276,7 +1301,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4GridIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH4GridIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH4GridIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
}
|
||||
return intersectors;
|
||||
|
@ -1291,7 +1315,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH4GridMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH4GridMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH4GridMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
|
|
@ -48,6 +48,9 @@ namespace embree
|
|||
Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
|
||||
|
||||
Accel* BVH4InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4InstanceArrayMB(Scene* scene);
|
||||
|
||||
Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
|
@ -77,7 +80,10 @@ namespace embree
|
|||
|
||||
Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
|
||||
|
||||
|
||||
Accel::Intersectors BVH4InstanceArrayIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4InstanceArrayMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
|
||||
|
||||
|
@ -122,7 +128,10 @@ namespace embree
|
|||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
|
||||
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
|
||||
|
@ -161,6 +170,9 @@ namespace embree
|
|||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
|
||||
|
@ -201,6 +213,9 @@ namespace embree
|
|||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
|
||||
|
@ -241,30 +256,13 @@ namespace embree
|
|||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
|
||||
|
||||
// ==============
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4IntersectorStreamPacketFallback);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4vIntersectorStreamPluecker);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamPluecker);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
|
||||
|
||||
// SAH scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
|
@ -294,6 +292,9 @@ namespace embree
|
|||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
|
@ -312,5 +313,6 @@ namespace embree
|
|||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "../geometry/subdivpatch1.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
#include "../common/accelinstance.h"
|
||||
|
||||
|
@ -66,6 +67,9 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
|
||||
|
@ -101,6 +105,9 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
|
||||
|
||||
|
@ -135,6 +142,9 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
|
||||
|
||||
|
@ -169,27 +179,12 @@ namespace embree
|
|||
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
|
||||
DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
|
||||
|
||||
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
|
@ -212,6 +207,9 @@ namespace embree
|
|||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
@ -224,6 +222,7 @@ namespace embree
|
|||
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
|
||||
BVH8Factory::BVH8Factory(int bfeatures, int ifeatures)
|
||||
{
|
||||
|
@ -256,6 +255,9 @@ namespace embree
|
|||
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceSceneBuilderSAH));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceMBSceneBuilderSAH));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceArraySceneBuilderSAH));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceArrayMBSceneBuilderSAH));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridSceneBuilderSAH));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridMBSceneBuilderSAH));
|
||||
|
@ -270,6 +272,7 @@ namespace embree
|
|||
IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelQuadMeshSAH));
|
||||
IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelVirtualSAH));
|
||||
IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelInstanceSAH));
|
||||
IF_ENABLED_INSTANCE_ARRAY (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelInstanceArraySAH));
|
||||
}
|
||||
|
||||
void BVH8Factory::selectIntersectors(int features)
|
||||
|
@ -313,6 +316,9 @@ namespace embree
|
|||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector1));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector1));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayIntersector1));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayMBIntersector1));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector1Moeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridMBIntersector1Moeller))
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector1Pluecker));
|
||||
|
@ -351,6 +357,9 @@ namespace embree
|
|||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector4Chunk));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector4Chunk));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayIntersector4Chunk));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayMBIntersector4Chunk));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector4HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector4HybridPluecker));
|
||||
|
||||
|
@ -386,6 +395,9 @@ namespace embree
|
|||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector8Chunk));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector8Chunk));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayIntersector8Chunk));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayMBIntersector8Chunk));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector8HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector8HybridPluecker));
|
||||
|
||||
|
@ -421,29 +433,12 @@ namespace embree
|
|||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceIntersector16Chunk));
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceMBIntersector16Chunk));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceArrayIntersector16Chunk));
|
||||
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceArrayMBIntersector16Chunk));
|
||||
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH8GridIntersector16HybridMoeller));
|
||||
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH8GridIntersector16HybridPluecker));
|
||||
|
||||
/* select stream intersectors */
|
||||
|
||||
SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8IntersectorStreamPacketFallback);
|
||||
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4IntersectorStreamMoeller));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4IntersectorStreamMoellerNoFilter));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersectorStreamMoeller));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersectorStreamPluecker));
|
||||
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersectorStreamPluecker));
|
||||
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamMoeller));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamMoellerNoFilter));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersectorStreamMoeller));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamPluecker));
|
||||
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersectorStreamPluecker));
|
||||
|
||||
IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualIntersectorStream));
|
||||
|
||||
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersectorStream));
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -460,7 +455,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4Hybrid();
|
||||
intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8Hybrid();
|
||||
intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16Hybrid();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -474,7 +468,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4Hybrid();
|
||||
intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8Hybrid();
|
||||
intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16Hybrid();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -496,7 +489,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4HybridMB();
|
||||
intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8HybridMB();
|
||||
intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16HybridMB();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -510,7 +502,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4HybridMB();
|
||||
intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8HybridMB();
|
||||
intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16HybridMB();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -532,8 +523,6 @@ namespace embree
|
|||
intersectors.intersector8_nofilter = BVH8Triangle4Intersector8HybridMoellerNoFilter();
|
||||
intersectors.intersector16_filter = BVH8Triangle4Intersector16HybridMoeller();
|
||||
intersectors.intersector16_nofilter = BVH8Triangle4Intersector16HybridMoellerNoFilter();
|
||||
intersectors.intersectorN_filter = BVH8Triangle4IntersectorStreamMoeller();
|
||||
intersectors.intersectorN_nofilter = BVH8Triangle4IntersectorStreamMoellerNoFilter();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -554,7 +543,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4vIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Triangle4vIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Triangle4vIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8Triangle4vIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -571,7 +559,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4iIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH8Triangle4iIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH8Triangle4iIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH8Triangle4iIntersectorStreamMoeller();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -584,7 +571,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4iIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Triangle4iIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Triangle4iIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8Triangle4iIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -604,7 +590,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -617,7 +602,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -637,7 +621,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -650,7 +633,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -673,8 +655,6 @@ namespace embree
|
|||
intersectors.intersector8_nofilter = BVH8Quad4vIntersector8HybridMoellerNoFilter();
|
||||
intersectors.intersector16_filter = BVH8Quad4vIntersector16HybridMoeller();
|
||||
intersectors.intersector16_nofilter = BVH8Quad4vIntersector16HybridMoellerNoFilter();
|
||||
intersectors.intersectorN_filter = BVH8Quad4vIntersectorStreamMoeller();
|
||||
intersectors.intersectorN_nofilter = BVH8Quad4vIntersectorStreamMoellerNoFilter();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -687,7 +667,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Quad4vIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Quad4vIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Quad4vIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8Quad4vIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -707,7 +686,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Quad4iIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH8Quad4iIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH8Quad4iIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH8Quad4iIntersectorStreamMoeller();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -720,7 +698,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Quad4iIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Quad4iIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Quad4iIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8Quad4iIntersectorStreamPluecker();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -740,7 +717,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -753,7 +729,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -794,7 +769,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8VirtualIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH8VirtualIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH8VirtualIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH8VirtualIntersectorStream();
|
||||
#endif
|
||||
intersectors.collider = BVH8ColliderUserGeom();
|
||||
return intersectors;
|
||||
|
@ -809,7 +783,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8VirtualMBIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH8VirtualMBIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH8VirtualMBIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -823,7 +796,19 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8InstanceIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH8InstanceIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH8InstanceIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH8InstanceIntersectorStream();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
||||
Accel::Intersectors BVH8Factory::BVH8InstanceArrayIntersectors(BVH8* bvh)
|
||||
{
|
||||
Accel::Intersectors intersectors;
|
||||
intersectors.ptr = bvh;
|
||||
intersectors.intersector1 = BVH8InstanceArrayIntersector1();
|
||||
#if defined (EMBREE_RAY_PACKETS)
|
||||
intersectors.intersector4 = BVH8InstanceArrayIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH8InstanceArrayIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH8InstanceArrayIntersector16Chunk();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -837,7 +822,19 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8InstanceMBIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH8InstanceMBIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH8InstanceMBIntersector16Chunk();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
||||
Accel::Intersectors BVH8Factory::BVH8InstanceArrayMBIntersectors(BVH8* bvh)
|
||||
{
|
||||
Accel::Intersectors intersectors;
|
||||
intersectors.ptr = bvh;
|
||||
intersectors.intersector1 = BVH8InstanceArrayMBIntersector1();
|
||||
#if defined (EMBREE_RAY_PACKETS)
|
||||
intersectors.intersector4 = BVH8InstanceArrayMBIntersector4Chunk();
|
||||
intersectors.intersector8 = BVH8InstanceArrayMBIntersector8Chunk();
|
||||
intersectors.intersector16 = BVH8InstanceArrayMBIntersector16Chunk();
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
@ -1086,6 +1083,28 @@ namespace embree
|
|||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel* BVH8Factory::BVH8InstanceArray(Scene* scene, BuildVariant bvariant)
|
||||
{
|
||||
BVH8* accel = new BVH8(InstanceArrayPrimitive::type,scene);
|
||||
Accel::Intersectors intersectors = BVH8InstanceArrayIntersectors(accel);
|
||||
auto gtype = Geometry::MTY_INSTANCE_ARRAY;
|
||||
// Builder* builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);
|
||||
|
||||
Builder* builder = nullptr;
|
||||
if (scene->device->object_builder == "default") {
|
||||
switch (bvariant) {
|
||||
case BuildVariant::STATIC : builder = BVH8InstanceArraySceneBuilderSAH(accel,scene,gtype); break;
|
||||
case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false); break;
|
||||
case BuildVariant::HIGH_QUALITY: assert(false); break;
|
||||
}
|
||||
}
|
||||
else if (scene->device->object_builder == "sah") builder = BVH8InstanceArraySceneBuilderSAH(accel,scene,gtype);
|
||||
else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false);
|
||||
else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>");
|
||||
|
||||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel* BVH8Factory::BVH8InstanceMB(Scene* scene, bool isExpensive)
|
||||
{
|
||||
BVH8* accel = new BVH8(InstancePrimitive::type,scene);
|
||||
|
@ -1095,6 +1114,15 @@ namespace embree
|
|||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel* BVH8Factory::BVH8InstanceArrayMB(Scene* scene)
|
||||
{
|
||||
BVH8* accel = new BVH8(InstanceArrayPrimitive::type,scene);
|
||||
Accel::Intersectors intersectors = BVH8InstanceArrayMBIntersectors(accel);
|
||||
auto gtype = Geometry::MTY_INSTANCE_ARRAY;
|
||||
Builder* builder = BVH8InstanceArrayMBSceneBuilderSAH(accel,scene,gtype);
|
||||
return new AccelInstance(accel,builder,intersectors);
|
||||
}
|
||||
|
||||
Accel::Intersectors BVH8Factory::BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant)
|
||||
{
|
||||
Accel::Intersectors intersectors;
|
||||
|
@ -1106,7 +1134,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8GridIntersector4HybridMoeller();
|
||||
intersectors.intersector8 = BVH8GridIntersector8HybridMoeller();
|
||||
intersectors.intersector16 = BVH8GridIntersector16HybridMoeller();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
}
|
||||
else /* if (ivariant == IntersectVariant::ROBUST) */
|
||||
|
@ -1116,7 +1143,6 @@ namespace embree
|
|||
intersectors.intersector4 = BVH8GridIntersector4HybridPluecker();
|
||||
intersectors.intersector8 = BVH8GridIntersector8HybridPluecker();
|
||||
intersectors.intersector16 = BVH8GridIntersector16HybridPluecker();
|
||||
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
|
||||
#endif
|
||||
}
|
||||
return intersectors;
|
||||
|
@ -1131,7 +1157,6 @@ namespace embree
|
|||
intersectors.intersector4 = nullptr;
|
||||
intersectors.intersector8 = nullptr;
|
||||
intersectors.intersector16 = nullptr;
|
||||
intersectors.intersectorN = nullptr;
|
||||
#endif
|
||||
return intersectors;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,9 @@ namespace embree
|
|||
Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
|
||||
|
||||
Accel* BVH8InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8InstanceArrayMB(Scene* scene);
|
||||
|
||||
Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
|
@ -70,6 +73,9 @@ namespace embree
|
|||
Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8InstanceArrayIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8InstanceArrayMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
|
@ -111,6 +117,9 @@ namespace embree
|
|||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
|
||||
|
@ -145,7 +154,10 @@ namespace embree
|
|||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
|
||||
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
|
||||
|
||||
|
@ -180,9 +192,12 @@ namespace embree
|
|||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
|
||||
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
|
||||
|
@ -213,28 +228,13 @@ namespace embree
|
|||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
|
||||
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
|
||||
|
||||
// SAH scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
|
@ -258,6 +258,9 @@ namespace embree
|
|||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
@ -276,5 +279,6 @@ namespace embree
|
|||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
#if defined(__64BIT__)
|
||||
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
|
||||
|
@ -399,6 +400,50 @@ namespace embree
|
|||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,InstanceArrayPrimitive>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (InstanceArray* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items <= 1);
|
||||
|
||||
/* allocate leaf node */
|
||||
InstanceArrayPrimitive* accel = (InstanceArrayPrimitive*) alloc.malloc1(items*sizeof(InstanceArrayPrimitive),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const InstanceArray* instance = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
bounds.extend(instance->bounds(primID));
|
||||
new (&accel[i]) InstanceArrayPrimitive(geomID_, primID);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
InstanceArray* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<typename Mesh>
|
||||
struct CalculateMeshBounds
|
||||
{
|
||||
|
@ -523,7 +568,14 @@ namespace embree
|
|||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
|
@ -150,7 +151,7 @@ namespace embree
|
|||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
prims.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
PrimInfo pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
|
||||
|
@ -518,14 +519,35 @@ namespace embree
|
|||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
|
||||
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
|
||||
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,gtype);
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,1,gtype);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue