Merge pull request #44628 from JFonS/new_cpu_lightmapper_3.2

[3.2] New CPU lightmapper
This commit is contained in:
Rémi Verschelde 2021-01-15 13:25:46 +01:00 committed by GitHub
commit 497653ab53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
709 changed files with 208363 additions and 1775 deletions

View File

@ -31,8 +31,11 @@
#include "geometry.h"
#include "core/print_string.h"
#include "thirdparty/misc/clipper.hpp"
#include "thirdparty/misc/triangulator.h"
#define STB_RECT_PACK_IMPLEMENTATION
#include "thirdparty/stb_rect_pack/stb_rect_pack.h"
#define SCALE_FACTOR 100000.0 // Based on CMP_EPSILON.
@ -1224,3 +1227,36 @@ Vector<Vector3> Geometry::compute_convex_mesh_points(const Plane *p_planes, int
return points;
}
Vector<Geometry::PackRectsResult> Geometry::partial_pack_rects(const Vector<Vector2i> &p_sizes, const Size2i &p_atlas_size) {
Vector<stbrp_node> nodes;
nodes.resize(p_atlas_size.width);
zeromem(nodes.ptrw(), sizeof(stbrp_node) * nodes.size());
stbrp_context context;
stbrp_init_target(&context, p_atlas_size.width, p_atlas_size.height, nodes.ptrw(), p_atlas_size.width);
Vector<stbrp_rect> rects;
rects.resize(p_sizes.size());
for (int i = 0; i < p_sizes.size(); i++) {
rects.write[i].id = i;
rects.write[i].w = p_sizes[i].width;
rects.write[i].h = p_sizes[i].height;
rects.write[i].x = 0;
rects.write[i].y = 0;
rects.write[i].was_packed = 0;
}
stbrp_pack_rects(&context, rects.ptrw(), rects.size());
Vector<PackRectsResult> ret;
ret.resize(p_sizes.size());
for (int i = 0; i < p_sizes.size(); i++) {
ret.write[rects[i].id] = { rects[i].x, rects[i].y, static_cast<bool>(rects[i].was_packed) };
}
return ret;
}

View File

@ -502,6 +502,27 @@ public:
return (cn.cross(an) > 0) == orientation;
}
static Vector3 barycentric_coordinates_2d(const Vector2 &s, const Vector2 &a, const Vector2 &b, const Vector2 &c) {
// http://www.blackpawn.com/texts/pointinpoly/
Vector2 v0 = c - a;
Vector2 v1 = b - a;
Vector2 v2 = s - a;
// Compute dot products
double dot00 = v0.dot(v0);
double dot01 = v0.dot(v1);
double dot02 = v0.dot(v2);
double dot11 = v1.dot(v1);
double dot12 = v1.dot(v2);
// Compute barycentric coordinates
double invDenom = 1.0f / (dot00 * dot11 - dot01 * dot01);
double b2 = (dot11 * dot02 - dot01 * dot12) * invDenom;
double b1 = (dot00 * dot12 - dot01 * dot02) * invDenom;
double b0 = 1.0f - b2 - b1;
return Vector3(b0, b1, b2);
}
static Vector2 get_closest_point_to_segment_uncapped_2d(const Vector2 &p_point, const Vector2 *p_segment) {
Vector2 p = p_point - p_segment[0];
@ -1014,6 +1035,13 @@ public:
static void make_atlas(const Vector<Size2i> &p_rects, Vector<Point2i> &r_result, Size2i &r_size);
struct PackRectsResult {
int x;
int y;
bool packed;
};
static Vector<PackRectsResult> partial_pack_rects(const Vector<Vector2i> &p_sizes, const Size2i &p_atlas_size);
static Vector<Vector3> compute_convex_mesh_points(const Plane *p_planes, int p_plane_count);
private:

View File

@ -16,54 +16,71 @@
</return>
<argument index="0" name="from_node" type="Node" default="null">
</argument>
<argument index="1" name="create_visual_debug" type="bool" default="false">
<argument index="1" name="data_save_path" type="String" default="&quot;&quot;">
</argument>
<description>
Bakes the lightmaps within the currently edited scene. Returns a [enum BakeError] to signify if the bake was successful, or if unsuccessful, how the bake failed.
</description>
</method>
<method name="debug_bake">
<return type="void">
</return>
<description>
Executes a dry run bake of lightmaps within the currently edited scene.
Bakes the lightmap, scanning from the given [code]from_node[/code] root and saves the resulting [BakedLightmapData] in [code]data_save_path[/code]. If no save path is provided it will try to match the path from the current [member light_data].
</description>
</method>
</methods>
<members>
<member name="bake_cell_size" type="float" setter="set_bake_cell_size" getter="get_bake_cell_size" default="0.25">
Grid subdivision size for lightmapper calculation. The default value will work for most cases. Increase for better lighting on small details or if your scene is very large.
<member name="atlas_generate" type="bool" setter="set_generate_atlas" getter="is_generate_atlas_enabled" default="true">
When enabled, the lightmapper will merge the textures for all meshes into a single large layered texture. Not supported in GLES2.
</member>
<member name="bake_default_texels_per_unit" type="float" setter="set_bake_default_texels_per_unit" getter="get_bake_default_texels_per_unit" default="20.0">
If a [member Mesh.lightmap_size_hint] isn't specified, the lightmap baker will dynamically set the lightmap size using this value. This value is measured in texels per world unit. The maximum lightmap texture size is 4096x4096.
<member name="atlas_max_size" type="int" setter="set_max_atlas_size" getter="get_max_atlas_size" default="4096">
Maximum size of each lightmap layer, only used when [member atlas_generate] is enabled.
</member>
<member name="bake_energy" type="float" setter="set_energy" getter="get_energy" default="1.0">
Multiplies the light sources' intensity by this value. For instance, if the value is set to 2, lights will be twice as bright. If the value is set to 0.5, lights will be half as bright.
<member name="bias" type="float" setter="set_bias" getter="get_bias" default="0.005">
Raycasting bias used during baking to avoid floating point precission issues.
</member>
<member name="bake_extents" type="Vector3" setter="set_extents" getter="get_extents" default="Vector3( 10, 10, 10 )">
The size of the affected area.
</member>
<member name="bake_hdr" type="bool" setter="set_hdr" getter="is_hdr" default="false">
If [code]true[/code], the lightmap can capture light values greater than [code]1.0[/code]. Turning this off will result in a smaller file size.
</member>
<member name="bake_mode" type="int" setter="set_bake_mode" getter="get_bake_mode" enum="BakedLightmap.BakeMode" default="0">
Lightmapping mode. See [enum BakeMode].
</member>
<member name="bake_propagation" type="float" setter="set_propagation" getter="get_propagation" default="1.0">
Defines how far the light will travel before it is no longer effective. The higher the number, the farther the light will travel. For instance, if the value is set to 2, the light will go twice as far. If the value is set to 0.5, the light will only go half as far.
</member>
<member name="bake_quality" type="int" setter="set_bake_quality" getter="get_bake_quality" enum="BakedLightmap.BakeQuality" default="1">
Three quality modes are available. Higher quality requires more rendering time. See [enum BakeQuality].
<member name="bounces" type="int" setter="set_bounces" getter="get_bounces" default="3">
Number of light bounces that are taken into account during baking.
</member>
<member name="capture_cell_size" type="float" setter="set_capture_cell_size" getter="get_capture_cell_size" default="0.5">
Grid size used for real-time capture information on dynamic objects. Cannot be larger than [member bake_cell_size].
Grid size used for real-time capture information on dynamic objects.
</member>
<member name="image_path" type="String" setter="set_image_path" getter="get_image_path" default="&quot;.&quot;">
The location where lightmaps will be saved.
<member name="capture_enabled" type="bool" setter="set_capture_enabled" getter="get_capture_enabled" default="true">
When enabled, an octree containing the scene's lighting information will be computed. This octree will then be used to light dynamic objects in the scene.
</member>
<member name="capture_propagation" type="float" setter="set_capture_propagation" getter="get_capture_propagation" default="1.0">
Bias value to reduce the amount of light proagation in the captured octree.
</member>
<member name="capture_quality" type="int" setter="set_capture_quality" getter="get_capture_quality" enum="BakedLightmap.BakeQuality" default="1">
Bake quality of the capture data.
</member>
<member name="default_texels_per_unit" type="float" setter="set_default_texels_per_unit" getter="get_default_texels_per_unit" default="16.0">
If a baked mesh doesn't have a UV2 size hint, this value will be used to roughly compute a suitable lightmap size.
</member>
<member name="environment_custom_color" type="Color" setter="set_environment_custom_color" getter="get_environment_custom_color">
The environment color when [member environment_mode] is set to [constant ENVIRONMENT_MODE_CUSTOM_COLOR].
</member>
<member name="environment_custom_energy" type="float" setter="set_environment_custom_energy" getter="get_environment_custom_energy">
The energy scaling factor when when [member environment_mode] is set to [constant ENVIRONMENT_MODE_CUSTOM_COLOR] or [constant ENVIRONMENT_MODE_CUSTOM_SKY].
</member>
<member name="environment_custom_sky" type="Sky" setter="set_environment_custom_sky" getter="get_environment_custom_sky">
The [Sky] resource to use when [member environment_mode] is set o [constant ENVIRONMENT_MODE_CUSTOM_SKY].
</member>
<member name="environment_custom_sky_rotation_degrees" type="Vector3" setter="set_environment_custom_sky_rotation_degrees" getter="get_environment_custom_sky_rotation_degrees">
The rotation of the baked custom sky.
</member>
<member name="environment_mode" type="int" setter="set_environment_mode" getter="get_environment_mode" enum="BakedLightmap.EnvironmentMode" default="0">
Decides which environment to use during baking.
</member>
<member name="extents" type="Vector3" setter="set_extents" getter="get_extents" default="Vector3( 10, 10, 10 )">
Size of the baked lightmap. Only meshes inside this region will be included in the baked lightmap, also used as the bounds of the captured region for dynamic lighting.
</member>
<member name="image_path" type="String" setter="set_image_path" getter="get_image_path">
Deprecated, in previous versions it determined the location where lightmaps were be saved.
</member>
<member name="light_data" type="BakedLightmapData" setter="set_light_data" getter="get_light_data">
The calculated light data.
</member>
<member name="quality" type="int" setter="set_bake_quality" getter="get_bake_quality" enum="BakedLightmap.BakeQuality" default="1">
Determines the amount of samples per texel used in indrect light baking. The amount of samples for each quality level can be configured in the project settings.
</member>
<member name="use_denoiser" type="bool" setter="set_use_denoiser" getter="is_using_denoiser" default="true">
When enabled, a lightmap denoiser will be used to reduce the noise inherent to Monte Carlo based global illumination.
</member>
</members>
<constants>
<constant name="BAKE_QUALITY_LOW" value="0" enum="BakeQuality">
@ -73,13 +90,10 @@
The default bake quality mode.
</constant>
<constant name="BAKE_QUALITY_HIGH" value="2" enum="BakeQuality">
The highest bake quality mode. Takes longer to calculate.
A higher bake quality mode. Takes longer to calculate.
</constant>
<constant name="BAKE_MODE_CONE_TRACE" value="0" enum="BakeMode">
Less precise but faster bake mode.
</constant>
<constant name="BAKE_MODE_RAY_TRACE" value="1" enum="BakeMode">
More precise bake mode but can take considerably longer to bake.
<constant name="BAKE_QUALITY_ULTRA" value="3" enum="BakeQuality">
The highest bake quality mode. Takes the longest to calculate.
</constant>
<constant name="BAKE_ERROR_OK" value="0" enum="BakeError">
Baking was successful.
@ -93,8 +107,28 @@
<constant name="BAKE_ERROR_CANT_CREATE_IMAGE" value="3" enum="BakeError">
Returns when the baker cannot save per-mesh textures to file.
</constant>
<constant name="BAKE_ERROR_USER_ABORTED" value="4" enum="BakeError">
<constant name="BAKE_ERROR_LIGHTMAP_SIZE" value="4" enum="BakeError">
The size of the generated lightmaps is too large.
</constant>
<constant name="BAKE_ERROR_INVALID_MESH" value="5" enum="BakeError">
Some mesh contains UV2 values outside the [code][0,1][/code] range.
</constant>
<constant name="BAKE_ERROR_USER_ABORTED" value="6" enum="BakeError">
Returns if user cancels baking.
</constant>
<constant name="BAKE_ERROR_NO_LIGHTMAPPER" value="7" enum="BakeError">
</constant>
<constant name="ENVIRONMENT_MODE_DISABLED" value="0" enum="EnvironmentMode">
No environment is used during baking.
</constant>
<constant name="ENVIRONMENT_MODE_SCENE" value="1" enum="EnvironmentMode">
The baked environment is automatically picked from the current scene.
</constant>
<constant name="ENVIRONMENT_MODE_CUSTOM_SKY" value="2" enum="EnvironmentMode">
A custom sky is used as environment during baking.
</constant>
<constant name="ENVIRONMENT_MODE_CUSTOM_COLOR" value="3" enum="EnvironmentMode">
A custom solid color is used as environment during baking.
</constant>
</constants>
</class>

View File

@ -12,9 +12,13 @@
</return>
<argument index="0" name="path" type="NodePath">
</argument>
<argument index="1" name="lightmap" type="Texture">
<argument index="1" name="lightmap" type="Resource">
</argument>
<argument index="2" name="instance" type="int">
<argument index="2" name="lightmap_slice" type="int">
</argument>
<argument index="3" name="lightmap_uv_rect" type="Rect2">
</argument>
<argument index="4" name="instance" type="int">
</argument>
<description>
</description>
@ -32,7 +36,7 @@
</description>
</method>
<method name="get_user_lightmap" qualifiers="const">
<return type="Texture">
<return type="Resource">
</return>
<argument index="0" name="user_idx" type="int">
</argument>

View File

@ -46,6 +46,12 @@
<member name="extra_cull_margin" type="float" setter="set_extra_cull_margin" getter="get_extra_cull_margin" default="0.0">
The extra distance added to the GeometryInstance's bounding box ([AABB]) to increase its cull box.
</member>
<member name="generate_lightmap" type="bool" setter="set_generate_lightmap" getter="get_generate_lightmap" default="true">
When disabled, the mesh will be taken into account when computing indirect lighting, but the resulting lightmap will not be saved. Useful for emissive only materials or shadow casters.
</member>
<member name="lightmap_scale" type="int" setter="set_lightmap_scale" getter="get_lightmap_scale" enum="GeometryInstance.LightmapScale" default="0">
Scale factor for the generated baked lightmap. Useful for adding detail to certain mesh instances.
</member>
<member name="lod_max_distance" type="float" setter="set_lod_max_distance" getter="get_lod_max_distance" default="0.0">
The GeometryInstance's max LOD distance.
[b]Note:[/b] This property currently has no effect.
@ -71,6 +77,20 @@
</member>
</members>
<constants>
<constant name="LIGHTMAP_SCALE_1X" value="0" enum="LightmapScale">
The generated lightmap texture will have the original size.
</constant>
<constant name="LIGHTMAP_SCALE_2X" value="1" enum="LightmapScale">
The generated lightmap texture will be twice as large, on each axis.
</constant>
<constant name="LIGHTMAP_SCALE_4X" value="2" enum="LightmapScale">
The generated lightmap texture will be 4 times as large, on each axis.
</constant>
<constant name="LIGHTMAP_SCALE_8X" value="3" enum="LightmapScale">
The generated lightmap texture will be 8 times as large, on each axis.
</constant>
<constant name="LIGHTMAP_SCALE_MAX" value="4" enum="LightmapScale">
</constant>
<constant name="SHADOW_CASTING_SETTING_OFF" value="0" enum="ShadowCastingSetting">
Will not cast any shadows.
</constant>

View File

@ -107,7 +107,7 @@
</methods>
<members>
<member name="lightmap_size_hint" type="Vector2" setter="set_lightmap_size_hint" getter="get_lightmap_size_hint" default="Vector2( 0, 0 )">
Sets a hint to be used for lightmap resolution in [BakedLightmap]. Overrides [member BakedLightmap.bake_default_texels_per_unit].
Sets a hint to be used for lightmap resolution in [BakedLightmap]. Overrides [member BakedLightmap.default_texels_per_unit].
</member>
</members>
<constants>

View File

@ -1057,6 +1057,18 @@
The amount of UV contraction. This figure is divided by 1000000, and is a proportion of the total texture dimensions, where the width and height are both ranged from 0.0 to 1.0.
Use the default unless correcting for a problem on particular hardware.
</member>
<member name="rendering/cpu_lightmapper/quality/high_quality_ray_count" type="int" setter="" getter="" default="512">
Amount of light samples taken when using [constant BakedLightmap.BAKE_QUALITY_HIGH].
</member>
<member name="rendering/cpu_lightmapper/quality/low_quality_ray_count" type="int" setter="" getter="" default="64">
Amount of light samples taken when using [constant BakedLightmap.BAKE_QUALITY_LOW].
</member>
<member name="rendering/cpu_lightmapper/quality/medium_quality_ray_count" type="int" setter="" getter="" default="256">
Amount of light samples taken when using [constant BakedLightmap.BAKE_QUALITY_MEDIUM].
</member>
<member name="rendering/cpu_lightmapper/quality/ultra_quality_ray_count" type="int" setter="" getter="" default="1024">
Amount of light samples taken when using [constant BakedLightmap.BAKE_QUALITY_ULTRA].
</member>
<member name="rendering/environment/default_clear_color" type="Color" setter="" getter="" default="Color( 0.3, 0.3, 0.3, 1 )">
Default background clear color. Overridable per [Viewport] using its [Environment]. See [member Environment.background_mode] and [member Environment.background_color] in particular. To change this default color programmatically, use [method VisualServer.set_default_clear_color].
</member>
@ -1169,6 +1181,12 @@
<member name="rendering/quality/intended_usage/framebuffer_allocation.mobile" type="int" setter="" getter="" default="3">
Lower-end override for [member rendering/quality/intended_usage/framebuffer_allocation] on mobile devices, due to performance concerns or driver support.
</member>
<member name="rendering/quality/lightmapping/use_bicubic_sampling" type="bool" setter="" getter="" default="true">
Enable usage of bicubic sampling in baked lightmaps. This results in smoother looking lighting at the expense of more bandwidth usage. On GLES2, changes to this setting will only be applied upon restarting the application.
</member>
<member name="rendering/quality/lightmapping/use_bicubic_sampling.mobile" type="bool" setter="" getter="" default="false">
Lower-end override for [member rendering/quality/lightmapping/use_bicubic_sampling] on mobile devices, in order to reduce bandwidth usage.
</member>
<member name="rendering/quality/reflections/atlas_size" type="int" setter="" getter="" default="2048">
Size of the atlas used by reflection probes. A larger size can result in higher visual quality, while a smaller size will be faster and take up less memory.
</member>

View File

@ -2045,6 +2045,10 @@
</argument>
<argument index="2" name="lightmap" type="RID">
</argument>
<argument index="3" name="lightmap_slice" type="int" default="-1">
</argument>
<argument index="4" name="lightmap_uv_rect" type="Rect2" default="Rect2( 0, 0, 1, 1 )">
</argument>
<description>
Sets the lightmap to use with this instance.
</description>

View File

@ -2572,6 +2572,9 @@ void RasterizerSceneGLES2::_render_render_list(RenderList::Element **p_elements,
if (rebind_lightmap && lightmap) {
state.scene_shader.set_uniform(SceneShaderGLES2::LIGHTMAP_ENERGY, lightmap_energy);
if (storage->config.use_lightmap_filter_bicubic) {
state.scene_shader.set_uniform(SceneShaderGLES2::LIGHTMAP_TEXTURE_SIZE, Vector2(lightmap->width, lightmap->height));
}
}
state.scene_shader.set_uniform(SceneShaderGLES2::WORLD_TRANSFORM, e->instance->transform);
@ -4047,6 +4050,10 @@ void RasterizerSceneGLES2::initialize() {
}
}
if (storage->config.use_lightmap_filter_bicubic) {
state.scene_shader.add_custom_define("#define USE_LIGHTMAP_FILTER_BICUBIC\n");
}
shadow_filter_mode = SHADOW_FILTER_NEAREST;
glFrontFace(GL_CW);

View File

@ -6299,6 +6299,9 @@ void RasterizerStorageGLES2::initialize() {
config.force_vertex_shading = GLOBAL_GET("rendering/quality/shading/force_vertex_shading");
config.use_fast_texture_filter = GLOBAL_GET("rendering/quality/filters/use_nearest_mipmap_filter");
GLOBAL_DEF_RST("rendering/quality/lightmapping/use_bicubic_sampling", true);
GLOBAL_DEF_RST("rendering/quality/lightmapping/use_bicubic_sampling.mobile", false);
config.use_lightmap_filter_bicubic = GLOBAL_GET("rendering/quality/lightmapping/use_bicubic_sampling");
}
void RasterizerStorageGLES2::finalize() {

View File

@ -57,6 +57,7 @@ public:
bool shrink_textures_x2;
bool use_fast_texture_filter;
bool use_skeleton_software;
bool use_lightmap_filter_bicubic;
int max_vertex_texture_image_units;
int max_texture_image_units;

View File

@ -98,4 +98,4 @@ public:
ShaderCompilerGLES2();
};
#endif // SHADERCOMPILERGLES3_H
#endif // SHADERCOMPILERGLES2_H

View File

@ -889,6 +889,69 @@ void reflection_process(samplerCube reflection_map,
#ifdef USE_LIGHTMAP
uniform mediump sampler2D lightmap; //texunit:-4
uniform mediump float lightmap_energy;
#if defined(USE_LIGHTMAP_FILTER_BICUBIC)
uniform mediump vec2 lightmap_texture_size;
// w0, w1, w2, and w3 are the four cubic B-spline basis functions
float w0(float a) {
return (1.0 / 6.0) * (a * (a * (-a + 3.0) - 3.0) + 1.0);
}
float w1(float a) {
return (1.0 / 6.0) * (a * a * (3.0 * a - 6.0) + 4.0);
}
float w2(float a) {
return (1.0 / 6.0) * (a * (a * (-3.0 * a + 3.0) + 3.0) + 1.0);
}
float w3(float a) {
return (1.0 / 6.0) * (a * a * a);
}
// g0 and g1 are the two amplitude functions
float g0(float a) {
return w0(a) + w1(a);
}
float g1(float a) {
return w2(a) + w3(a);
}
// h0 and h1 are the two offset functions
float h0(float a) {
return -1.0 + w1(a) / (w0(a) + w1(a));
}
float h1(float a) {
return 1.0 + w3(a) / (w2(a) + w3(a));
}
vec4 texture2D_bicubic(sampler2D tex, vec2 uv) {
vec2 texel_size = vec2(1.0) / lightmap_texture_size;
uv = uv * lightmap_texture_size + vec2(0.5);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5)) * texel_size;
vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5)) * texel_size;
return (g0(fuv.y) * (g0x * texture2D(tex, p0) + g1x * texture2D(tex, p1))) +
(g1(fuv.y) * (g0x * texture2D(tex, p2) + g1x * texture2D(tex, p3)));
}
#endif //USE_LIGHTMAP_FILTER_BICUBIC
#endif
#ifdef USE_LIGHTMAP_CAPTURE
@ -1661,8 +1724,12 @@ FRAGMENT_SHADER_CODE
#ifdef USE_LIGHTMAP
//ambient light will come entirely from lightmap is lightmap is used
#if defined(USE_LIGHTMAP_FILTER_BICUBIC)
ambient_light = texture2D_bicubic(lightmap, uv2_interp).rgb * lightmap_energy;
#else
ambient_light = texture2D(lightmap, uv2_interp).rgb * lightmap_energy;
#endif
#endif
#ifdef USE_LIGHTMAP_CAPTURE
{

View File

@ -1949,7 +1949,17 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform
if (lightmap && capture) {
glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 9);
if (e->instance->lightmap_slice == -1) {
glBindTexture(GL_TEXTURE_2D, lightmap->tex_id);
} else {
glBindTexture(GL_TEXTURE_2D_ARRAY, lightmap->tex_id);
state.scene_shader.set_uniform(SceneShaderGLES3::LIGHTMAP_LAYER, e->instance->lightmap_slice);
}
const Rect2 &uvr = e->instance->lightmap_uv_rect;
state.scene_shader.set_uniform(SceneShaderGLES3::LIGHTMAP_UV_RECT, Color(uvr.get_position().x, uvr.get_position().y, uvr.get_size().x, uvr.get_size().y));
if (storage->config.use_lightmap_filter_bicubic) {
state.scene_shader.set_uniform(SceneShaderGLES3::LIGHTMAP_TEXTURE_SIZE, Vector2(lightmap->width, lightmap->height));
}
state.scene_shader.set_uniform(SceneShaderGLES3::LIGHTMAP_ENERGY, capture->energy);
}
}
@ -2080,6 +2090,7 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements, int p_
state.scene_shader.set_conditional(SceneShaderGLES3::USE_GI_PROBES, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_CAPTURE, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_LAYERED, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_RADIANCE_MAP, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_CONTACT_SHADOWS, false);
@ -2088,6 +2099,7 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements, int p_
state.scene_shader.set_conditional(SceneShaderGLES3::USE_GI_PROBES, e->instance->gi_probe_instances.size() > 0);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP, e->instance->lightmap.is_valid() && e->instance->gi_probe_instances.size() == 0);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_LAYERED, e->instance->lightmap_slice != -1);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_CAPTURE, !e->instance->lightmap_capture_data.empty() && !e->instance->lightmap.is_valid() && e->instance->gi_probe_instances.size() == 0);
state.scene_shader.set_conditional(SceneShaderGLES3::SHADELESS, false);
@ -2258,6 +2270,7 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements, int p_
state.scene_shader.set_conditional(SceneShaderGLES3::SHADOW_MODE_PCF_13, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_GI_PROBES, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_LAYERED, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_CAPTURE, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_CONTACT_SHADOWS, false);
state.scene_shader.set_conditional(SceneShaderGLES3::USE_VERTEX_LIGHTING, false);
@ -2392,6 +2405,9 @@ void RasterizerSceneGLES3::_add_geometry_with_material(RasterizerStorageGLES3::G
if (e->instance->lightmap.is_valid()) {
e->sort_key |= SORT_KEY_LIGHTMAP_FLAG;
if (e->instance->lightmap_slice != -1) {
e->sort_key |= SORT_KEY_LIGHTMAP_LAYERED_FLAG;
}
}
if (!e->instance->lightmap_capture_data.empty()) {
@ -5337,6 +5353,8 @@ void RasterizerSceneGLES3::iteration() {
subsurface_scatter_quality = SubSurfaceScatterQuality(int(GLOBAL_GET("rendering/quality/subsurface_scattering/quality")));
subsurface_scatter_size = GLOBAL_GET("rendering/quality/subsurface_scattering/scale");
storage->config.use_lightmap_filter_bicubic = GLOBAL_GET("rendering/quality/lightmapping/use_bicubic_sampling");
state.scene_shader.set_conditional(SceneShaderGLES3::USE_LIGHTMAP_FILTER_BICUBIC, storage->config.use_lightmap_filter_bicubic);
state.scene_shader.set_conditional(SceneShaderGLES3::VCT_QUALITY_HIGH, GLOBAL_GET("rendering/quality/voxel_cone_tracing/high_quality"));
}

View File

@ -680,14 +680,15 @@ public:
SORT_KEY_OPAQUE_DEPTH_LAYER_SHIFT = 52,
SORT_KEY_OPAQUE_DEPTH_LAYER_MASK = 0xF,
//64 bits unsupported in MSVC
#define SORT_KEY_UNSHADED_FLAG (uint64_t(1) << 49)
#define SORT_KEY_NO_DIRECTIONAL_FLAG (uint64_t(1) << 48)
#define SORT_KEY_LIGHTMAP_CAPTURE_FLAG (uint64_t(1) << 47)
#define SORT_KEY_UNSHADED_FLAG (uint64_t(1) << 50)
#define SORT_KEY_NO_DIRECTIONAL_FLAG (uint64_t(1) << 49)
#define SORT_KEY_LIGHTMAP_CAPTURE_FLAG (uint64_t(1) << 48)
#define SORT_KEY_LIGHTMAP_LAYERED_FLAG (uint64_t(1) << 47)
#define SORT_KEY_LIGHTMAP_FLAG (uint64_t(1) << 46)
#define SORT_KEY_GI_PROBES_FLAG (uint64_t(1) << 45)
#define SORT_KEY_VERTEX_LIT_FLAG (uint64_t(1) << 44)
SORT_KEY_SHADING_SHIFT = 44,
SORT_KEY_SHADING_MASK = 63,
SORT_KEY_SHADING_MASK = 127,
//44-28 material index
SORT_KEY_MATERIAL_INDEX_SHIFT = 28,
//28-8 geometry index

View File

@ -8555,6 +8555,10 @@ void RasterizerStorageGLES3::initialize() {
String renderer = (const char *)glGetString(GL_RENDERER);
GLOBAL_DEF("rendering/quality/lightmapping/use_bicubic_sampling", true);
GLOBAL_DEF("rendering/quality/lightmapping/use_bicubic_sampling.mobile", false);
config.use_lightmap_filter_bicubic = GLOBAL_GET("rendering/quality/lightmapping/use_bicubic_sampling");
config.use_depth_prepass = bool(GLOBAL_GET("rendering/quality/depth_prepass/enable"));
if (config.use_depth_prepass) {

View File

@ -74,6 +74,7 @@ public:
bool shrink_textures_x2;
bool use_fast_texture_filter;
bool use_anisotropic_filter;
bool use_lightmap_filter_bicubic;
bool s3tc_supported;
bool latc_supported;

View File

@ -109,6 +109,10 @@ layout(std140) uniform SceneData { // ubo:0
uniform highp mat4 world_transform;
#ifdef USE_LIGHTMAP
uniform highp vec4 lightmap_uv_rect;
#endif
#ifdef USE_LIGHT_DIRECTIONAL
layout(std140) uniform DirectionalLightData { //ubo:3
@ -346,7 +350,9 @@ void main() {
uv_interp = uv_attrib;
#endif
#if defined(ENABLE_UV2_INTERP) || defined(USE_LIGHTMAP)
#if defined(USE_LIGHTMAP)
uv2_interp = lightmap_uv_rect.zw * uv2_attrib + lightmap_uv_rect.xy;
#elif defined(ENABLE_UV2_INTERP)
uv2_interp = uv2_attrib;
#endif
@ -1435,8 +1441,109 @@ void reflection_process(int idx, vec3 vertex, vec3 normal, vec3 binormal, vec3 t
}
#ifdef USE_LIGHTMAP
#ifdef USE_LIGHTMAP_LAYERED
uniform mediump sampler2DArray lightmap; //texunit:-9
uniform int lightmap_layer;
#else
uniform mediump sampler2D lightmap; //texunit:-9
#endif
uniform mediump float lightmap_energy;
#ifdef USE_LIGHTMAP_FILTER_BICUBIC
uniform vec2 lightmap_texture_size;
// w0, w1, w2, and w3 are the four cubic B-spline basis functions
float w0(float a) {
return (1.0 / 6.0) * (a * (a * (-a + 3.0) - 3.0) + 1.0);
}
float w1(float a) {
return (1.0 / 6.0) * (a * a * (3.0 * a - 6.0) + 4.0);
}
float w2(float a) {
return (1.0 / 6.0) * (a * (a * (-3.0 * a + 3.0) + 3.0) + 1.0);
}
float w3(float a) {
return (1.0 / 6.0) * (a * a * a);
}
// g0 and g1 are the two amplitude functions
float g0(float a) {
return w0(a) + w1(a);
}
float g1(float a) {
return w2(a) + w3(a);
}
// h0 and h1 are the two offset functions
float h0(float a) {
return -1.0 + w1(a) / (w0(a) + w1(a));
}
float h1(float a) {
return 1.0 + w3(a) / (w2(a) + w3(a));
}
vec4 texture_bicubic(sampler2D tex, vec2 uv) {
vec2 texel_size = vec2(1.0) / lightmap_texture_size;
uv = uv * lightmap_texture_size + vec2(0.5);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5)) * texel_size;
vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5)) * texel_size;
return (g0(fuv.y) * (g0x * texture(tex, p0) + g1x * texture(tex, p1))) +
(g1(fuv.y) * (g0x * texture(tex, p2) + g1x * texture(tex, p3)));
}
vec4 textureArray_bicubic(sampler2DArray tex, vec3 uv) {
vec2 texel_size = vec2(1.0) / lightmap_texture_size;
uv.xy = uv.xy * lightmap_texture_size + vec2(0.5);
vec2 iuv = floor(uv.xy);
vec2 fuv = fract(uv.xy);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5)) * texel_size;
vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5)) * texel_size;
return (g0(fuv.y) * (g0x * texture(tex, vec3(p0, uv.z)) + g1x * texture(tex, vec3(p1, uv.z)))) +
(g1(fuv.y) * (g0x * texture(tex, vec3(p2, uv.z)) + g1x * texture(tex, vec3(p3, uv.z))));
}
#define LIGHTMAP_TEXTURE_SAMPLE(m_tex, m_uv) texture_bicubic(m_tex, m_uv)
#define LIGHTMAP_TEXTURE_LAYERED_SAMPLE(m_tex, m_uv) textureArray_bicubic(m_tex, m_uv)
#else //!USE_LIGHTMAP_FILTER_BICUBIC
#define LIGHTMAP_TEXTURE_SAMPLE(m_tex, m_uv) texture(m_tex, m_uv)
#define LIGHTMAP_TEXTURE_LAYERED_SAMPLE(m_tex, m_uv) texture(m_tex, m_uv)
#endif //USE_LIGHTMAP_FILTER_BICUBIC
#endif
#ifdef USE_LIGHTMAP_CAPTURE
@ -1823,7 +1930,11 @@ FRAGMENT_SHADER_CODE
#endif
#ifdef USE_LIGHTMAP
ambient_light = texture(lightmap, uv2).rgb * lightmap_energy;
#ifdef USE_LIGHTMAP_LAYERED
ambient_light = LIGHTMAP_TEXTURE_LAYERED_SAMPLE(lightmap, vec3(uv2, float(lightmap_layer))).rgb * lightmap_energy;
#else
ambient_light = LIGHTMAP_TEXTURE_SAMPLE(lightmap, uv2).rgb * lightmap_energy;
#endif
#endif
#ifdef USE_LIGHTMAP_CAPTURE

View File

@ -107,16 +107,17 @@ void ResourceImporterLayeredTexture::_save_tex(const Vector<Ref<Image> > &p_imag
f->store_32(p_images[0]->get_height());
f->store_32(p_images.size()); //depth
f->store_32(p_texture_flags);
if ((p_compress_mode == COMPRESS_LOSSLESS) && p_images[0]->get_format() > Image::FORMAT_RGBA8) {
p_compress_mode = COMPRESS_UNCOMPRESSED; //these can't go as lossy
}
if (p_compress_mode != COMPRESS_VIDEO_RAM) {
//vram needs to do a first compression to tell what the format is, for the rest its ok
f->store_32(p_images[0]->get_format());
f->store_32(p_compress_mode); // 0 - lossless (PNG), 1 - vram, 2 - uncompressed
}
if ((p_compress_mode == COMPRESS_LOSSLESS) && p_images[0]->get_format() > Image::FORMAT_RGBA8) {
p_compress_mode = COMPRESS_UNCOMPRESSED; //these can't go as lossy
}
for (int i = 0; i < p_images.size(); i++) {
switch (p_compress_mode) {

View File

@ -931,9 +931,6 @@ static String _make_extname(const String &p_str) {
void ResourceImporterScene::_find_meshes(Node *p_node, Map<Ref<ArrayMesh>, Transform> &meshes) {
List<PropertyInfo> pi;
p_node->get_property_list(&pi);
MeshInstance *mi = Object::cast_to<MeshInstance>(p_node);
if (mi) {
@ -941,11 +938,11 @@ void ResourceImporterScene::_find_meshes(Node *p_node, Map<Ref<ArrayMesh>, Trans
Ref<ArrayMesh> mesh = mi->get_mesh();
if (mesh.is_valid() && !meshes.has(mesh)) {
Spatial *s = mi;
Spatial *s = Object::cast_to<Spatial>(mi);
Transform transform;
while (s) {
transform = transform * s->get_transform();
s = s->get_parent_spatial();
s = Object::cast_to<Spatial>(s->get_parent());
}
meshes[mesh] = transform;
@ -1427,11 +1424,31 @@ Error ResourceImporterScene::import(const String &p_source_file, const String &p
Map<Ref<ArrayMesh>, Transform> meshes;
_find_meshes(scene, meshes);
if (light_bake_mode == 2) {
String file_id = src_path.get_file();
String cache_file_path = base_path.plus_file(file_id + ".unwrap_cache");
int *cache_data = nullptr;
unsigned int cache_size = 0;
if (FileAccess::exists(cache_file_path)) {
Error err2;
FileAccess *file = FileAccess::open(cache_file_path, FileAccess::READ, &err2);
if (!err2) {
cache_size = file->get_len();
cache_data = (int *)memalloc(cache_size);
file->get_buffer((unsigned char *)cache_data, cache_size);
}
if (file)
memdelete(file);
}
float texel_size = p_options["meshes/lightmap_texel_size"];
texel_size = MAX(0.001, texel_size);
Map<String, unsigned int> used_meshes;
EditorProgress progress2("gen_lightmaps", TTR("Generating Lightmaps"), meshes.size());
int step = 0;
for (Map<Ref<ArrayMesh>, Transform>::Element *E = meshes.front(); E; E = E->next()) {
@ -1444,12 +1461,80 @@ Error ResourceImporterScene::import(const String &p_source_file, const String &p
progress2.step(TTR("Generating for Mesh: ") + name + " (" + itos(step) + "/" + itos(meshes.size()) + ")", step);
Error err2 = mesh->lightmap_unwrap(E->get(), texel_size);
int *ret_cache_data = cache_data;
unsigned int ret_cache_size = cache_size;
bool ret_used_cache = true; // Tell the unwrapper to use the cache
Error err2 = mesh->lightmap_unwrap_cached(ret_cache_data, ret_cache_size, ret_used_cache, E->get(), texel_size);
if (err2 != OK) {
EditorNode::add_io_error("Mesh '" + name + "' failed lightmap generation. Please fix geometry.");
} else {
String hash = String::md5((unsigned char *)ret_cache_data);
used_meshes.insert(hash, ret_cache_size);
if (!ret_used_cache) {
// Cache was not used, add the generated entry to the current cache
unsigned int new_cache_size = cache_size + ret_cache_size + (cache_size == 0 ? 4 : 0);
int *new_cache_data = (int *)memalloc(new_cache_size);
if (cache_size == 0) {
// Cache was empty
new_cache_data[0] = 0;
cache_size = 4;
} else {
memcpy(new_cache_data, cache_data, cache_size);
memfree(cache_data);
}
memcpy(&new_cache_data[cache_size / sizeof(int)], ret_cache_data, ret_cache_size);
cache_data = new_cache_data;
cache_size = new_cache_size;
cache_data[0]++; // Increase entry count
}
}
step++;
}
Error err2;
FileAccess *file = FileAccess::open(cache_file_path, FileAccess::WRITE, &err2);
if (err2) {
if (file)
memdelete(file);
} else {
// Store number of entries
file->store_32(used_meshes.size());
// Store cache entries
unsigned int r_idx = 1;
for (int i = 0; i < cache_data[0]; ++i) {
unsigned char *entry_start = (unsigned char *)&cache_data[r_idx];
String entry_hash = String::md5(entry_start);
if (used_meshes.has(entry_hash)) {
unsigned int entry_size = used_meshes[entry_hash];
file->store_buffer(entry_start, entry_size);
}
r_idx += 4; // hash
r_idx += 2; // size hint
int vertex_count = cache_data[r_idx];
r_idx += 1; // vertex count
r_idx += vertex_count; // vertex
r_idx += vertex_count * 2; // uvs
int index_count = cache_data[r_idx];
r_idx += 1; // index count
r_idx += index_count; // indices
}
file->close();
memfree(cache_data);
}
}

View File

@ -30,32 +30,58 @@
#include "baked_lightmap_editor_plugin.h"
void BakedLightmapEditorPlugin::_bake() {
void BakedLightmapEditorPlugin::_bake_select_file(const String &p_file) {
if (lightmap) {
BakedLightmap::BakeError err;
if (get_tree()->get_edited_scene_root() && get_tree()->get_edited_scene_root() == lightmap) {
err = lightmap->bake(lightmap);
err = lightmap->bake(lightmap, p_file);
} else {
err = lightmap->bake(lightmap->get_parent());
err = lightmap->bake(lightmap->get_parent(), p_file);
}
bake_func_end();
switch (err) {
case BakedLightmap::BAKE_ERROR_NO_SAVE_PATH:
EditorNode::get_singleton()->show_warning(TTR("Can't determine a save path for lightmap images.\nSave your scene (for images to be saved in the same dir), or pick a save path from the BakedLightmap properties."));
case BakedLightmap::BAKE_ERROR_NO_SAVE_PATH: {
String scene_path = lightmap->get_filename();
if (scene_path == String()) {
scene_path = lightmap->get_owner()->get_filename();
}
if (scene_path == String()) {
EditorNode::get_singleton()->show_warning(TTR("Can't determine a save path for lightmap images.\nSave your scene and try again."));
break;
}
scene_path = scene_path.get_basename() + ".lmbake";
file_dialog->set_current_path(scene_path);
file_dialog->popup_centered_ratio();
} break;
case BakedLightmap::BAKE_ERROR_NO_MESHES:
EditorNode::get_singleton()->show_warning(TTR("No meshes to bake. Make sure they contain an UV2 channel and that the 'Bake Light' flag is on."));
break;
case BakedLightmap::BAKE_ERROR_CANT_CREATE_IMAGE:
EditorNode::get_singleton()->show_warning(TTR("Failed creating lightmap images, make sure path is writable."));
break;
case BakedLightmap::BAKE_ERROR_LIGHTMAP_SIZE:
EditorNode::get_singleton()->show_warning(TTR("Failed determining lightmap size. Maximum lightmap size too small?"));
break;
case BakedLightmap::BAKE_ERROR_INVALID_MESH:
EditorNode::get_singleton()->show_warning(TTR("Some mesh is invalid. Make sure the UV2 channel values are conatined within the [0.0,1.0] square region."));
break;
case BakedLightmap::BAKE_ERROR_NO_LIGHTMAPPER:
EditorNode::get_singleton()->show_warning(TTR("Godot editor was built without ray tracing support, lightmaps can't be baked."));
break;
default: {
}
}
}
}
void BakedLightmapEditorPlugin::_bake() {
_bake_select_file("");
}
void BakedLightmapEditorPlugin::edit(Object *p_object) {
BakedLightmap *s = Object::cast_to<BakedLightmap>(p_object);
@ -81,29 +107,40 @@ void BakedLightmapEditorPlugin::make_visible(bool p_visible) {
}
EditorProgress *BakedLightmapEditorPlugin::tmp_progress = NULL;
EditorProgress *BakedLightmapEditorPlugin::tmp_subprogress = NULL;
void BakedLightmapEditorPlugin::bake_func_begin(int p_steps) {
ERR_FAIL_COND(tmp_progress != NULL);
tmp_progress = memnew(EditorProgress("bake_lightmaps", TTR("Bake Lightmaps"), p_steps, true));
bool BakedLightmapEditorPlugin::bake_func_step(float p_progress, const String &p_description, void *, bool p_force_refresh) {
if (!tmp_progress) {
tmp_progress = memnew(EditorProgress("bake_lightmaps", TTR("Bake Lightmaps"), 1000, true));
ERR_FAIL_COND_V(tmp_progress == nullptr, false);
}
return tmp_progress->step(p_description, p_progress * 1000, p_force_refresh);
}
bool BakedLightmapEditorPlugin::bake_func_step(int p_step, const String &p_description) {
ERR_FAIL_COND_V(tmp_progress == NULL, false);
return tmp_progress->step(p_description, p_step, false);
bool BakedLightmapEditorPlugin::bake_func_substep(float p_progress, const String &p_description, void *, bool p_force_refresh) {
if (!tmp_subprogress) {
tmp_subprogress = memnew(EditorProgress("bake_lightmaps_substep", "", 1000, true));
ERR_FAIL_COND_V(tmp_subprogress == nullptr, false);
}
return tmp_subprogress->step(p_description, p_progress * 1000, p_force_refresh);
}
void BakedLightmapEditorPlugin::bake_func_end() {
ERR_FAIL_COND(tmp_progress == NULL);
if (tmp_progress != nullptr) {
memdelete(tmp_progress);
tmp_progress = NULL;
tmp_progress = nullptr;
}
if (tmp_subprogress != nullptr) {
memdelete(tmp_subprogress);
tmp_subprogress = nullptr;
}
}
void BakedLightmapEditorPlugin::_bind_methods() {
ClassDB::bind_method("_bake", &BakedLightmapEditorPlugin::_bake);
ClassDB::bind_method("_bake_select_file", &BakedLightmapEditorPlugin::_bake_select_file);
}
BakedLightmapEditorPlugin::BakedLightmapEditorPlugin(EditorNode *p_node) {
@ -114,12 +151,19 @@ BakedLightmapEditorPlugin::BakedLightmapEditorPlugin(EditorNode *p_node) {
bake->set_text(TTR("Bake Lightmaps"));
bake->hide();
bake->connect("pressed", this, "_bake");
file_dialog = memnew(EditorFileDialog);
file_dialog->set_mode(EditorFileDialog::MODE_SAVE_FILE);
file_dialog->add_filter("*.lmbake ; LightMap Bake");
file_dialog->set_title(TTR("Select lightmap bake file:"));
file_dialog->connect("file_selected", this, "_bake_select_file");
bake->add_child(file_dialog);
add_control_to_container(CONTAINER_SPATIAL_EDITOR_MENU, bake);
lightmap = NULL;
BakedLightmap::bake_begin_function = bake_func_begin;
BakedLightmap::bake_step_function = bake_func_step;
BakedLightmap::bake_end_function = bake_func_end;
BakedLightmap::bake_substep_function = bake_func_substep;
}
BakedLightmapEditorPlugin::~BakedLightmapEditorPlugin() {

View File

@ -45,11 +45,15 @@ class BakedLightmapEditorPlugin : public EditorPlugin {
ToolButton *bake;
EditorNode *editor;
EditorFileDialog *file_dialog;
static EditorProgress *tmp_progress;
static void bake_func_begin(int p_steps);
static bool bake_func_step(int p_step, const String &p_description);
static EditorProgress *tmp_subprogress;
static bool bake_func_step(float p_progress, const String &p_description, void *, bool p_force_refresh);
static bool bake_func_substep(float p_progress, const String &p_description, void *, bool p_force_refresh);
static void bake_func_end();
void _bake_select_file(const String &p_file);
void _bake();
protected:

View File

@ -180,6 +180,7 @@ void ProgressDialog::add_task(const String &p_task, const String &p_label, int p
t.progress = memnew(ProgressBar);
t.progress->set_max(p_steps);
t.progress->set_value(p_steps);
t.last_progress_tick = 0;
vb2->add_child(t.progress);
t.state = memnew(Label);
t.state->set_clip_text(true);
@ -204,20 +205,20 @@ bool ProgressDialog::task_step(const String &p_task, const String &p_state, int
ERR_FAIL_COND_V(!tasks.has(p_task), cancelled);
Task &t = tasks[p_task];
if (!p_force_redraw) {
uint64_t tus = OS::get_singleton()->get_ticks_usec();
if (tus - last_progress_tick < 200000) //200ms
if (tus - t.last_progress_tick < 200000) //200ms
return cancelled;
}
Task &t = tasks[p_task];
if (p_step < 0)
t.progress->set_value(t.progress->get_value() + 1);
else
t.progress->set_value(p_step);
t.state->set_text(p_state);
last_progress_tick = OS::get_singleton()->get_ticks_usec();
t.last_progress_tick = OS::get_singleton()->get_ticks_usec();
if (cancel_hb->is_visible()) {
OS::get_singleton()->force_process_input();
}
@ -254,7 +255,6 @@ ProgressDialog::ProgressDialog() {
add_child(main);
main->set_anchors_and_margins_preset(Control::PRESET_WIDE);
set_exclusive(true);
last_progress_tick = 0;
singleton = this;
cancel_hb = memnew(HBoxContainer);
main->add_child(cancel_hb);

View File

@ -77,13 +77,13 @@ class ProgressDialog : public Popup {
VBoxContainer *vb;
ProgressBar *progress;
Label *state;
uint64_t last_progress_tick;
};
HBoxContainer *cancel_hb;
Button *cancel;
Map<String, Task> tasks;
VBoxContainer *main;
uint64_t last_progress_tick;
static ProgressDialog *singleton;
void _popup();

118
modules/denoise/SCsub Normal file
View File

@ -0,0 +1,118 @@
#!/usr/bin/env python
import resource_to_cpp
Import("env")
Import("env_modules")
env_oidn = env_modules.Clone()
# Thirdparty source files
thirdparty_dir = "#thirdparty/oidn/"
thirdparty_sources = [
"core/api.cpp",
"core/device.cpp",
"core/filter.cpp",
"core/network.cpp",
"core/autoencoder.cpp",
"core/transfer_function.cpp",
"weights/rtlightmap_hdr.gen.cpp",
"mkl-dnn/src/common/batch_normalization.cpp",
"mkl-dnn/src/common/concat.cpp",
"mkl-dnn/src/common/convolution.cpp",
"mkl-dnn/src/common/convolution_pd.cpp",
"mkl-dnn/src/common/deconvolution.cpp",
"mkl-dnn/src/common/eltwise.cpp",
"mkl-dnn/src/common/engine.cpp",
"mkl-dnn/src/common/inner_product.cpp",
"mkl-dnn/src/common/inner_product_pd.cpp",
"mkl-dnn/src/common/lrn.cpp",
"mkl-dnn/src/common/memory.cpp",
"mkl-dnn/src/common/memory_desc_wrapper.cpp",
"mkl-dnn/src/common/mkldnn_debug.cpp",
"mkl-dnn/src/common/mkldnn_debug_autogenerated.cpp",
"mkl-dnn/src/common/pooling.cpp",
"mkl-dnn/src/common/primitive.cpp",
"mkl-dnn/src/common/primitive_attr.cpp",
"mkl-dnn/src/common/primitive_desc.cpp",
"mkl-dnn/src/common/primitive_exec_types.cpp",
"mkl-dnn/src/common/primitive_iterator.cpp",
"mkl-dnn/src/common/query.cpp",
"mkl-dnn/src/common/reorder.cpp",
"mkl-dnn/src/common/rnn.cpp",
"mkl-dnn/src/common/scratchpad.cpp",
"mkl-dnn/src/common/shuffle.cpp",
"mkl-dnn/src/common/softmax.cpp",
"mkl-dnn/src/common/stream.cpp",
"mkl-dnn/src/common/sum.cpp",
"mkl-dnn/src/common/utils.cpp",
"mkl-dnn/src/common/verbose.cpp",
"mkl-dnn/src/cpu/cpu_barrier.cpp",
"mkl-dnn/src/cpu/cpu_concat.cpp",
"mkl-dnn/src/cpu/cpu_engine.cpp",
"mkl-dnn/src/cpu/cpu_memory.cpp",
"mkl-dnn/src/cpu/cpu_reducer.cpp",
"mkl-dnn/src/cpu/cpu_reorder.cpp",
"mkl-dnn/src/cpu/cpu_sum.cpp",
"mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp",
"mkl-dnn/src/cpu/jit_avx2_convolution.cpp",
"mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.cpp",
"mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp",
"mkl-dnn/src/cpu/jit_avx512_common_convolution.cpp",
"mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.cpp",
"mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.cpp",
"mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.cpp",
"mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.cpp",
"mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.cpp",
"mkl-dnn/src/cpu/jit_sse42_convolution.cpp",
"mkl-dnn/src/cpu/jit_transpose_src_utils.cpp",
"mkl-dnn/src/cpu/jit_uni_eltwise.cpp",
"mkl-dnn/src/cpu/jit_uni_pool_kernel_f32.cpp",
"mkl-dnn/src/cpu/jit_uni_pooling.cpp",
"mkl-dnn/src/cpu/jit_uni_reorder.cpp",
"mkl-dnn/src/cpu/jit_uni_reorder_utils.cpp",
"mkl-dnn/src/cpu/jit_utils/jit_utils.cpp",
"mkl-dnn/src/cpu/jit_utils/jitprofiling/jitprofiling.c",
"common/platform.cpp",
"common/thread.cpp",
"common/tensor.cpp",
]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
thirdparty_include_dirs = [
"",
"include",
"mkl-dnn/include",
"mkl-dnn/src",
"mkl-dnn/src/common",
"mkl-dnn/src/cpu/xbyak",
"mkl-dnn/src/cpu",
]
thirdparty_include_dirs = [thirdparty_dir + file for file in thirdparty_include_dirs]
env_oidn.Prepend(CPPPATH=thirdparty_include_dirs)
env_oidn.Append(
CPPDEFINES=[
"MKLDNN_THR=MKLDNN_THR_SEQ",
"OIDN_STATIC_LIB",
"__STDC_CONSTANT_MACROS",
"__STDC_LIMIT_MACROS",
"DISABLE_VERBOSE",
"MKLDNN_ENABLE_CONCURRENT_EXEC",
"NDEBUG",
]
)
env_thirdparty = env_oidn.Clone()
env_thirdparty.disable_warnings()
env_thirdparty.add_source_files(env.modules_sources, thirdparty_sources)
weights_in_path = thirdparty_dir + "weights/rtlightmap_hdr.tza"
weights_out_path = thirdparty_dir + "weights/rtlightmap_hdr.gen.cpp"
env_thirdparty.Depends(weights_out_path, weights_in_path)
env_thirdparty.CommandNoCache(weights_out_path, weights_in_path, resource_to_cpp.tza_to_cpp)
env_oidn.add_source_files(env.modules_sources, "denoise_wrapper.cpp")
env_modules.add_source_files(env.modules_sources, ["register_types.cpp", "lightmap_denoiser.cpp"])

15
modules/denoise/config.py Normal file
View File

@ -0,0 +1,15 @@
def can_build(env, platform):
# Thirdparty dependency OpenImage Denoise includes oneDNN library
# which only supports 64-bit architectures.
# It's also only relevant for tools build and desktop platforms,
# as doing lightmap generation and denoising on Android or HTML5
# would be a bit far-fetched.
# Note: oneDNN doesn't support ARM64, OIDN needs updating to the latest version
supported_platform = platform in ["x11", "osx", "windows", "server"]
supported_bits = env["bits"] == "64"
supported_arch = env["arch"] != "arm64"
return env["tools"] and supported_platform and supported_bits and supported_arch
def configure(env):
pass

View File

@ -0,0 +1,67 @@
/*************************************************************************/
/* denoise_wrapper.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "denoise_wrapper.h"
#include "core/os/copymem.h"
#include "core/os/memory.h"
#include "thirdparty/oidn/include/OpenImageDenoise/oidn.h"
#include <stdio.h>
void *oidn_denoiser_init() {
OIDNDeviceImpl *device = oidnNewDevice(OIDN_DEVICE_TYPE_CPU);
oidnCommitDevice(device);
return device;
}
bool oidn_denoise(void *deviceptr, float *p_floats, int p_width, int p_height) {
OIDNDeviceImpl *device = (OIDNDeviceImpl *)deviceptr;
OIDNFilter filter = oidnNewFilter(device, "RTLightmap");
void *input_buffer = memalloc(p_width * p_height * 3 * sizeof(float));
copymem(input_buffer, p_floats, p_width * p_height * 3 * sizeof(float));
oidnSetSharedFilterImage(filter, "color", input_buffer, OIDN_FORMAT_FLOAT3, p_width, p_height, 0, 0, 0);
oidnSetSharedFilterImage(filter, "output", (void *)p_floats, OIDN_FORMAT_FLOAT3, p_width, p_height, 0, 0, 0);
oidnSetFilter1b(filter, "hdr", true);
oidnCommitFilter(filter);
oidnExecuteFilter(filter);
const char *msg;
bool success = true;
if (oidnGetDeviceError(device, &msg) != OIDN_ERROR_NONE) {
printf("LightmapDenoiser: %s\n", msg);
success = false;
}
oidnReleaseFilter(filter);
return success;
}
void oidn_denoiser_finish(void *device) {
oidnReleaseDevice((OIDNDeviceImpl *)device);
}

View File

@ -0,0 +1,38 @@
/*************************************************************************/
/* denoise_wrapper.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef DENOISE_WRAPPER_H
#define DENOISE_WRAPPER_H
void *oidn_denoiser_init();
bool oidn_denoise(void *device, float *p_floats, int p_width, int p_height);
void oidn_denoiser_finish(void *device);
#endif // DENOISE_WRAPPER_H

View File

@ -0,0 +1,66 @@
/*************************************************************************/
/* lightmap_denoiser.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "lightmap_denoiser.h"
#include "denoise_wrapper.h"
LightmapDenoiser *LightmapDenoiserOIDN::create_oidn_denoiser() {
return memnew(LightmapDenoiserOIDN);
}
void LightmapDenoiserOIDN::make_default_denoiser() {
create_function = create_oidn_denoiser;
}
Ref<Image> LightmapDenoiserOIDN::denoise_image(const Ref<Image> &p_image) {
Ref<Image> img = p_image->duplicate();
img->convert(Image::FORMAT_RGBF);
PoolByteArray data = img->get_data();
{
PoolByteArray::Write w = data.write();
if (!oidn_denoise(device, (float *)w.ptr(), img->get_width(), img->get_height())) {
return p_image;
}
}
img->create(img->get_width(), img->get_height(), false, img->get_format(), data);
return img;
}
LightmapDenoiserOIDN::LightmapDenoiserOIDN() {
device = oidn_denoiser_init();
}
LightmapDenoiserOIDN::~LightmapDenoiserOIDN() {
oidn_denoiser_finish(device);
}

View File

@ -0,0 +1,56 @@
/*************************************************************************/
/* lightmap_denoiser.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef LIGHTMAP_DENOISER_H
#define LIGHTMAP_DENOISER_H
#include "core/class_db.h"
#include "scene/3d/lightmapper.h"
struct OIDNDeviceImpl;
class LightmapDenoiserOIDN : public LightmapDenoiser {
GDCLASS(LightmapDenoiserOIDN, LightmapDenoiser);
protected:
void *device = nullptr;
public:
static LightmapDenoiser *create_oidn_denoiser();
Ref<Image> denoise_image(const Ref<Image> &p_image);
static void make_default_denoiser();
LightmapDenoiserOIDN();
~LightmapDenoiserOIDN();
};
#endif // LIGHTMAP_DENOISER_H

View File

@ -0,0 +1,41 @@
/*************************************************************************/
/* register_types.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "register_types.h"
#include "core/engine.h"
#include "lightmap_denoiser.h"
void register_denoise_types() {
LightmapDenoiserOIDN::make_default_denoiser();
}
void unregister_denoise_types() {
}

View File

@ -0,0 +1,37 @@
/*************************************************************************/
/* register_types.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef DENOISE_REGISTER_TYPES_H
#define DENOISE_REGISTER_TYPES_H
void register_denoise_types();
void unregister_denoise_types();
#endif // DENOISE_REGISTER_TYPES_H

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
## ======================================================================== ##
## Copyright 2009-2019 Intel Corporation ##
## ##
## Licensed under the Apache License, Version 2.0 (the "License"); ##
## you may not use this file except in compliance with the License. ##
## You may obtain a copy of the License at ##
## ##
## http://www.apache.org/licenses/LICENSE-2.0 ##
## ##
## Unless required by applicable law or agreed to in writing, software ##
## distributed under the License is distributed on an "AS IS" BASIS, ##
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
## See the License for the specific language governing permissions and ##
## limitations under the License. ##
## ======================================================================== ##
import os
from array import array
# Generates a C++ file from the specified binary resource file
def generate(in_path, out_path):
namespace = "oidn::weights"
scopes = namespace.split("::")
file_name = os.path.basename(in_path)
var_name = os.path.splitext(file_name)[0]
with open(in_path, "rb") as in_file, open(out_path, "w") as out_file:
# Header
out_file.write("// Generated from: %s\n" % file_name)
out_file.write("#include <cstddef>\n\n")
# Open the namespaces
for s in scopes:
out_file.write("namespace %s {\n" % s)
if scopes:
out_file.write("\n")
# Read the file
in_data = array("B", in_file.read())
# Write the size
out_file.write("//const size_t %s_size = %d;\n\n" % (var_name, len(in_data)))
# Write the data
out_file.write("unsigned char %s[] = {" % var_name)
for i in range(len(in_data)):
c = in_data[i]
if i > 0:
out_file.write(",")
if (i + 1) % 20 == 1:
out_file.write("\n")
out_file.write("%d" % c)
out_file.write("\n};\n")
# Close the namespaces
if scopes:
out_file.write("\n")
for scope in reversed(scopes):
out_file.write("} // namespace %s\n" % scope)
def tza_to_cpp(target, source, env):
for x in zip(source, target):
generate(str(x[0]), str(x[1]))

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
Import("env")
Import("env_modules")
env_lightmapper_rd = env_modules.Clone()
# Godot source files
env_lightmapper_rd.Prepend(CPPPATH=["#thirdparty/embree/include"])
env_lightmapper_rd.add_source_files(env.modules_sources, "*.cpp")

View File

@ -0,0 +1,6 @@
def can_build(env, platform):
return env["tools"] and env["module_raycast_enabled"]
def configure(env):
pass

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,183 @@
/*************************************************************************/
/* lightmapper_cpu.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef LIGHTMAPPER_CPU_H
#define LIGHTMAPPER_CPU_H
#include "core/local_vector.h"
#include "scene/3d/lightmapper.h"
#include "scene/resources/mesh.h"
#include "scene/resources/surface_tool.h"
#include <atomic>
class LightmapperCPU : public Lightmapper {
GDCLASS(LightmapperCPU, Lightmapper)
struct MeshInstance {
MeshData data;
int slice = 0;
Vector2i offset;
Vector2i size;
bool cast_shadows;
bool generate_lightmap;
String node_name;
};
struct Light {
Vector3 position;
uint32_t type = LIGHT_TYPE_DIRECTIONAL;
Vector3 direction;
float energy;
float indirect_multiplier;
Color color;
float range;
float attenuation;
float spot_angle;
float spot_attenuation;
bool bake_direct;
};
struct LightmapTexel {
Vector3 albedo;
float alpha;
Vector3 emission;
Vector3 pos;
Vector3 normal;
Vector3 direct_light;
Vector3 output_light;
float area_coverage;
};
struct BakeParams {
float bias;
int bounces;
int samples;
bool use_denoiser = true;
Ref<Image> environment_panorama;
Basis environment_transform;
};
struct UVSeam {
Vector2 edge0[2];
Vector2 edge1[2];
};
struct SeamEdge {
Vector3 pos[2];
Vector3 normal[2];
Vector2 uv[2];
_FORCE_INLINE_ bool operator<(const SeamEdge &p_edge) const {
return pos[0].x < p_edge.pos[0].x;
}
};
struct AtlasOffset {
int slice;
int x;
int y;
};
struct ThreadData;
typedef void (LightmapperCPU::*BakeThreadFunc)(uint32_t, void *);
struct ThreadData {
LightmapperCPU *instance;
uint32_t count;
BakeThreadFunc thread_func;
void *userdata;
};
BakeParams parameters;
LocalVector<Ref<Image> > bake_textures;
Map<RID, Ref<Image> > albedo_textures;
Map<RID, Ref<Image> > emission_textures;
LocalVector<MeshInstance> mesh_instances;
LocalVector<Light> lights;
LocalVector<LocalVector<LightmapTexel> > scene_lightmaps;
LocalVector<LocalVector<int> > scene_lightmap_indices;
Set<int> no_shadow_meshes;
std::atomic<uint32_t> thread_progress;
std::atomic<bool> thread_cancelled;
Ref<LightmapRaycaster> raycaster;
Error _layout_atlas(int p_max_size, Vector2i *r_atlas_size, int *r_atlas_slices);
static void _thread_func_callback(void *p_thread_data);
void _thread_func_wrapper(uint32_t p_idx, ThreadData *p_thread_data);
bool _parallel_run(int p_count, const String &p_description, BakeThreadFunc p_thread_func, void *p_userdata, BakeStepFunc p_substep_func = nullptr);
void _generate_buffer(uint32_t p_idx, void *p_unused);
Ref<Image> _init_bake_texture(const MeshData::TextureDef &p_texture_def, const Map<RID, Ref<Image> > &p_tex_cache, Image::Format p_default_format);
Color _bilinear_sample(const Ref<Image> &p_img, const Vector2 &p_uv, bool p_clamp_x = false, bool p_clamp_y = false);
Vector3 _fix_sample_position(const Vector3 &p_position, const Vector3 &p_texel_center, const Vector3 &p_normal, const Vector3 &p_tangent, const Vector3 &p_bitangent, const Vector2 &p_texel_size);
void _plot_triangle(const Vector2 *p_vertices, const Vector3 *p_positions, const Vector3 *p_normals, const Vector2 *p_uvs, const Ref<Image> &p_albedo_texture, const Ref<Image> &p_emission_texture, Vector2i p_size, LocalVector<LightmapTexel> &r_texels, LocalVector<int> &r_lightmap_indices);
void _compute_direct_light(uint32_t p_idx, void *r_lightmap);
void _compute_indirect_light(uint32_t p_idx, void *r_lightmap);
void _post_process(uint32_t p_idx, void *r_output);
void _compute_seams(const MeshInstance &p_mesh, LocalVector<UVSeam> &r_seams);
void _fix_seams(const LocalVector<UVSeam> &p_seams, Vector3 *r_lightmap, Vector2i p_size);
void _fix_seam(const Vector2 &p_pos0, const Vector2 &p_pos1, const Vector2 &p_uv0, const Vector2 &p_uv1, const Vector3 *p_read_buffer, Vector3 *r_write_buffer, const Vector2i &p_size);
void _dilate_lightmap(Vector3 *r_lightmap, const LocalVector<int> p_indices, Vector2i p_size, int margin);
void _blit_lightmap(const Vector<Vector3> &p_src, const Vector2i &p_size, Ref<Image> &p_dst, int p_x, int p_y, bool p_with_padding);
public:
virtual void add_albedo_texture(Ref<Texture> p_texture);
virtual void add_emission_texture(Ref<Texture> p_texture);
virtual void add_mesh(const MeshData &p_mesh, Vector2i p_size);
virtual void add_directional_light(bool p_bake_direct, const Vector3 &p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier);
virtual void add_omni_light(bool p_bake_direct, const Vector3 &p_position, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation);
virtual void add_spot_light(bool p_bake_direct, const Vector3 &p_position, const Vector3 p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation, float p_spot_angle, float p_spot_attenuation);
virtual BakeError bake(BakeQuality p_quality, bool p_use_denoiser, int p_bounces, float p_bias, bool p_generate_atlas, int p_max_texture_size, const Ref<Image> &p_environment_panorama, const Basis &p_environment_transform, BakeStepFunc p_step_function = nullptr, void *p_bake_userdata = nullptr, BakeStepFunc p_substep_function = nullptr);
int get_bake_texture_count() const;
Ref<Image> get_bake_texture(int p_index) const;
int get_bake_mesh_count() const;
Variant get_bake_mesh_userdata(int p_index) const;
Rect2 get_bake_mesh_uv_scale(int p_index) const;
int get_bake_mesh_texture_slice(int p_index) const;
LightmapperCPU();
};
#endif // LIGHTMAPPER_H

View File

@ -0,0 +1,54 @@
/*************************************************************************/
/* register_types.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "register_types.h"
#include "core/project_settings.h"
#include "lightmapper_cpu.h"
#include "scene/3d/lightmapper.h"
#ifndef _3D_DISABLED
static Lightmapper *create_lightmapper_cpu() {
return memnew(LightmapperCPU);
}
#endif
void register_lightmapper_cpu_types() {
GLOBAL_DEF("rendering/cpu_lightmapper/quality/low_quality_ray_count", 64);
GLOBAL_DEF("rendering/cpu_lightmapper/quality/medium_quality_ray_count", 256);
GLOBAL_DEF("rendering/cpu_lightmapper/quality/high_quality_ray_count", 512);
GLOBAL_DEF("rendering/cpu_lightmapper/quality/ultra_quality_ray_count", 1024);
#ifndef _3D_DISABLED
Lightmapper::create_cpu = create_lightmapper_cpu;
#endif
}
void unregister_lightmapper_cpu_types() {
}

View File

@ -0,0 +1,37 @@
/*************************************************************************/
/* register_types.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef LIGHTMAPPER_CPU_REGISTER_TYPES_H
#define LIGHTMAPPER_CPU_REGISTER_TYPES_H
void register_lightmapper_cpu_types();
void unregister_lightmapper_cpu_types();
#endif // LIGHTMAPPER_CPU_REGISTER_TYPES_H

93
modules/raycast/SCsub Normal file
View File

@ -0,0 +1,93 @@
#!/usr/bin/env python
Import("env")
Import("env_modules")
embree_src = [
"common/sys/sysinfo.cpp",
"common/sys/alloc.cpp",
"common/sys/filename.cpp",
"common/sys/library.cpp",
"common/sys/thread.cpp",
"common/sys/string.cpp",
"common/sys/regression.cpp",
"common/sys/mutex.cpp",
"common/sys/condition.cpp",
"common/sys/barrier.cpp",
"common/math/constants.cpp",
"common/simd/sse.cpp",
"common/lexers/stringstream.cpp",
"common/lexers/tokenstream.cpp",
"common/tasking/taskschedulerinternal.cpp",
"common/algorithms/parallel_for.cpp",
"common/algorithms/parallel_reduce.cpp",
"common/algorithms/parallel_prefix_sum.cpp",
"common/algorithms/parallel_for_for.cpp",
"common/algorithms/parallel_for_for_prefix_sum.cpp",
"common/algorithms/parallel_partition.cpp",
"common/algorithms/parallel_sort.cpp",
"common/algorithms/parallel_set.cpp",
"common/algorithms/parallel_map.cpp",
"common/algorithms/parallel_filter.cpp",
"kernels/common/device.cpp",
"kernels/common/stat.cpp",
"kernels/common/acceln.cpp",
"kernels/common/accelset.cpp",
"kernels/common/state.cpp",
"kernels/common/rtcore.cpp",
"kernels/common/rtcore_builder.cpp",
"kernels/common/scene.cpp",
"kernels/common/alloc.cpp",
"kernels/common/geometry.cpp",
"kernels/common/scene_triangle_mesh.cpp",
"kernels/geometry/primitive4.cpp",
"kernels/builders/primrefgen.cpp",
"kernels/bvh/bvh.cpp",
"kernels/bvh/bvh_statistics.cpp",
"kernels/bvh/bvh4_factory.cpp",
"kernels/bvh/bvh8_factory.cpp",
"kernels/bvh/bvh_collider.cpp",
"kernels/bvh/bvh_rotate.cpp",
"kernels/bvh/bvh_refit.cpp",
"kernels/bvh/bvh_builder.cpp",
"kernels/bvh/bvh_builder_morton.cpp",
"kernels/bvh/bvh_builder_sah.cpp",
"kernels/bvh/bvh_builder_sah_spatial.cpp",
"kernels/bvh/bvh_builder_sah_mb.cpp",
"kernels/bvh/bvh_builder_twolevel.cpp",
"kernels/bvh/bvh_intersector1_bvh4.cpp",
]
embree_dir = "#thirdparty/embree/"
env_embree = env_modules.Clone()
embree_sources = [embree_dir + file for file in embree_src]
env_embree.Prepend(CPPPATH=[embree_dir, embree_dir + "include"])
env_embree.Append(
CPPFLAGS=[
"-DEMBREE_TARGET_SSE2",
"-DEMBREE_LOWEST_ISA",
"-msse2",
"-DTASKING_INTERNAL",
"-DNDEBUG",
"-D__SSE2__",
"-D__SSE__",
]
)
if not env_embree.msvc:
env_embree.Append(CPPFLAGS=["-mxsave"])
if env["platform"] == "windows":
if env.msvc:
env.Append(LINKFLAGS=["psapi.lib"])
else:
env.Append(LIBS=["psapi"])
env_embree.disable_warnings()
env_embree.add_source_files(env.modules_sources, embree_sources)
env_raycast = env_modules.Clone()
env_raycast.Prepend(CPPPATH=[embree_dir, embree_dir + "include", embree_dir + "common"])
env_raycast.add_source_files(env.modules_sources, "*.cpp")

13
modules/raycast/config.py Normal file
View File

@ -0,0 +1,13 @@
def can_build(env, platform):
# Embree requires at least SSE2 to be available, so 32-bit and ARM64 builds are
# not supported.
# It's also only relevant for tools build and desktop platforms,
# as doing lightmap generation on Android or HTML5 would be a bit far-fetched.
supported_platform = platform in ["x11", "osx", "windows", "server"]
supported_bits = env["bits"] == "64"
supported_arch = env["arch"] != "arm64"
return env["tools"] and supported_platform and supported_bits and supported_arch
def configure(env):
pass

View File

@ -0,0 +1,259 @@
import glob, os, shutil, subprocess, re
include_dirs = [
"common/tasking",
"kernels/bvh",
"kernels/builders",
"common/sys",
"kernels",
"kernels/common",
"common/math",
"common/algorithms",
"common/lexers",
"common/simd",
"include/embree3",
"kernels/subdiv",
"kernels/geometry",
]
cpp_files = [
"common/sys/sysinfo.cpp",
"common/sys/alloc.cpp",
"common/sys/filename.cpp",
"common/sys/library.cpp",
"common/sys/thread.cpp",
"common/sys/string.cpp",
"common/sys/regression.cpp",
"common/sys/mutex.cpp",
"common/sys/condition.cpp",
"common/sys/barrier.cpp",
"common/math/constants.cpp",
"common/simd/sse.cpp",
"common/lexers/stringstream.cpp",
"common/lexers/tokenstream.cpp",
"common/tasking/taskschedulerinternal.cpp",
"common/algorithms/parallel_for.cpp",
"common/algorithms/parallel_reduce.cpp",
"common/algorithms/parallel_prefix_sum.cpp",
"common/algorithms/parallel_for_for.cpp",
"common/algorithms/parallel_for_for_prefix_sum.cpp",
"common/algorithms/parallel_partition.cpp",
"common/algorithms/parallel_sort.cpp",
"common/algorithms/parallel_set.cpp",
"common/algorithms/parallel_map.cpp",
"common/algorithms/parallel_filter.cpp",
"kernels/common/device.cpp",
"kernels/common/stat.cpp",
"kernels/common/acceln.cpp",
"kernels/common/accelset.cpp",
"kernels/common/state.cpp",
"kernels/common/rtcore.cpp",
"kernels/common/rtcore_builder.cpp",
"kernels/common/scene.cpp",
"kernels/common/alloc.cpp",
"kernels/common/geometry.cpp",
"kernels/common/scene_triangle_mesh.cpp",
"kernels/geometry/primitive4.cpp",
"kernels/builders/primrefgen.cpp",
"kernels/bvh/bvh.cpp",
"kernels/bvh/bvh_statistics.cpp",
"kernels/bvh/bvh4_factory.cpp",
"kernels/bvh/bvh8_factory.cpp",
"kernels/bvh/bvh_collider.cpp",
"kernels/bvh/bvh_rotate.cpp",
"kernels/bvh/bvh_refit.cpp",
"kernels/bvh/bvh_builder.cpp",
"kernels/bvh/bvh_builder_morton.cpp",
"kernels/bvh/bvh_builder_sah.cpp",
"kernels/bvh/bvh_builder_sah_spatial.cpp",
"kernels/bvh/bvh_builder_sah_mb.cpp",
"kernels/bvh/bvh_builder_twolevel.cpp",
"kernels/bvh/bvh_intersector1.cpp",
"kernels/bvh/bvh_intersector1_bvh4.cpp",
]
os.chdir("../../thirdparty")
if os.path.exists("embree"):
shutil.rmtree("embree")
subprocess.run(["git", "clone", "https://github.com/embree/embree.git", "embree-tmp"])
os.chdir("embree-tmp")
commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip()
dest_dir = "../embree"
all_files = set(cpp_files)
for include_dir in include_dirs:
headers = glob.iglob(os.path.join(include_dir, "*.h"))
all_files.update(headers)
for f in all_files:
d = os.path.join(dest_dir, os.path.dirname(f))
if not os.path.exists(d):
os.makedirs(d)
shutil.copy2(f, d)
with open(os.path.join(dest_dir, "kernels/hash.h"), "w") as hash_file:
hash_file.write(
f"""
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTC_HASH "{commit_hash}"
"""
)
with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file:
config_file.write(
"""
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
/* #undef EMBREE_RAY_MASK */
/* #undef EMBREE_STAT_COUNTERS */
/* #undef EMBREE_BACKFACE_CULLING */
/* #undef EMBREE_BACKFACE_CULLING_CURVES */
#define EMBREE_FILTER_FUNCTION
/* #undef EMBREE_IGNORE_INVALID_RAYS */
#define EMBREE_GEOMETRY_TRIANGLE
/* #undef EMBREE_GEOMETRY_QUAD */
/* #undef EMBREE_GEOMETRY_CURVE */
/* #undef EMBREE_GEOMETRY_SUBDIVISION */
/* #undef EMBREE_GEOMETRY_USER */
/* #undef EMBREE_GEOMETRY_INSTANCE */
/* #undef EMBREE_GEOMETRY_GRID */
/* #undef EMBREE_GEOMETRY_POINT */
/* #undef EMBREE_RAY_PACKETS */
/* #undef EMBREE_COMPACT_POLYS */
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
#if defined(EMBREE_GEOMETRY_TRIANGLE)
#define IF_ENABLED_TRIS(x) x
#else
#define IF_ENABLED_TRIS(x)
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
#define IF_ENABLED_QUADS(x) x
#else
#define IF_ENABLED_QUADS(x)
#endif
#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
#define IF_ENABLED_CURVES_OR_POINTS(x) x
#else
#define IF_ENABLED_CURVES_OR_POINTS(x)
#endif
#if defined(EMBREE_GEOMETRY_CURVE)
#define IF_ENABLED_CURVES(x) x
#else
#define IF_ENABLED_CURVES(x)
#endif
#if defined(EMBREE_GEOMETRY_POINT)
#define IF_ENABLED_POINTS(x) x
#else
#define IF_ENABLED_POINTS(x)
#endif
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
#define IF_ENABLED_SUBDIV(x) x
#else
#define IF_ENABLED_SUBDIV(x)
#endif
#if defined(EMBREE_GEOMETRY_USER)
#define IF_ENABLED_USER(x) x
#else
#define IF_ENABLED_USER(x)
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
#define IF_ENABLED_INSTANCE(x) x
#else
#define IF_ENABLED_INSTANCE(x)
#endif
#if defined(EMBREE_GEOMETRY_GRID)
#define IF_ENABLED_GRIDS(x) x
#else
#define IF_ENABLED_GRIDS(x)
#endif
"""
)
with open("CMakeLists.txt", "r") as cmake_file:
cmake_content = cmake_file.read()
major_version = int(re.compile(r"EMBREE_VERSION_MAJOR\s(\d+)").findall(cmake_content)[0])
minor_version = int(re.compile(r"EMBREE_VERSION_MINOR\s(\d+)").findall(cmake_content)[0])
patch_version = int(re.compile(r"EMBREE_VERSION_PATCH\s(\d+)").findall(cmake_content)[0])
with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as config_file:
config_file.write(
f"""
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#define RTC_VERSION_MAJOR {major_version}
#define RTC_VERSION_MINOR {minor_version}
#define RTC_VERSION_PATCH {patch_version}
#define RTC_VERSION {major_version}{minor_version:02d}{patch_version:02d}
#define RTC_VERSION_STRING "{major_version}.{minor_version}.{patch_version}"
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
#define EMBREE_MIN_WIDTH 0
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
#define EMBREE_STATIC_LIB
/* #undef EMBREE_API_NAMESPACE */
#if defined(EMBREE_API_NAMESPACE)
# define RTC_NAMESPACE
# define RTC_NAMESPACE_BEGIN namespace {{
# define RTC_NAMESPACE_END }}
# define RTC_NAMESPACE_USE using namespace ;
# define RTC_API_EXTERN_C
# undef EMBREE_API_NAMESPACE
#else
# define RTC_NAMESPACE_BEGIN
# define RTC_NAMESPACE_END
# define RTC_NAMESPACE_USE
# if defined(__cplusplus)
# define RTC_API_EXTERN_C extern "C"
# else
# define RTC_API_EXTERN_C
# endif
#endif
#if defined(ISPC)
# define RTC_API_IMPORT extern "C" unmasked
# define RTC_API_EXPORT extern "C" unmasked
#elif defined(EMBREE_STATIC_LIB)
# define RTC_API_IMPORT RTC_API_EXTERN_C
# define RTC_API_EXPORT RTC_API_EXTERN_C
#elif defined(_WIN32)
# define RTC_API_IMPORT RTC_API_EXTERN_C __declspec(dllimport)
# define RTC_API_EXPORT RTC_API_EXTERN_C __declspec(dllexport)
#else
# define RTC_API_IMPORT RTC_API_EXTERN_C
# define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
#endif
#if defined(RTC_EXPORT_API)
# define RTC_API RTC_API_EXPORT
#else
# define RTC_API RTC_API_IMPORT
#endif
"""
)
os.chdir("..")
shutil.rmtree("embree-tmp")

View File

@ -0,0 +1,198 @@
/*************************************************************************/
/* lightmap_raycaster.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "lightmap_raycaster.h"
// From Embree.
#include <math/vec2.h>
#include <math/vec3.h>
using namespace embree;
LightmapRaycaster *LightmapRaycasterEmbree::create_embree_raycaster() {
return memnew(LightmapRaycasterEmbree);
}
void LightmapRaycasterEmbree::make_default_raycaster() {
create_function = create_embree_raycaster;
}
void LightmapRaycasterEmbree::filter_function(const struct RTCFilterFunctionNArguments *p_args) {
RTCHit *hit = (RTCHit *)p_args->hit;
unsigned int geomID = hit->geomID;
float u = hit->u;
float v = hit->v;
LightmapRaycasterEmbree *scene = (LightmapRaycasterEmbree *)p_args->geometryUserPtr;
RTCGeometry geom = rtcGetGeometry(scene->embree_scene, geomID);
rtcInterpolate0(geom, hit->primID, hit->u, hit->v, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 0, &hit->u, 2);
if (scene->alpha_textures.has(geomID)) {
const AlphaTextureData &alpha_texture = scene->alpha_textures[geomID];
if (alpha_texture.sample(hit->u, hit->v) < 128) {
p_args->valid[0] = 0;
return;
}
}
rtcInterpolate0(geom, hit->primID, u, v, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 1, &hit->Ng_x, 3);
}
bool LightmapRaycasterEmbree::intersect(Ray &r_ray) {
RTCIntersectContext context;
rtcInitIntersectContext(&context);
rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray);
return r_ray.geomID != RTC_INVALID_GEOMETRY_ID;
}
void LightmapRaycasterEmbree::intersect(Vector<Ray> &r_rays) {
Ray *rays = r_rays.ptrw();
for (int i = 0; i < r_rays.size(); ++i) {
intersect(rays[i]);
}
}
void LightmapRaycasterEmbree::set_mesh_alpha_texture(Ref<Image> p_alpha_texture, unsigned int p_id) {
if (p_alpha_texture.is_valid() && p_alpha_texture->get_size() != Vector2i()) {
AlphaTextureData tex;
tex.size = p_alpha_texture->get_size();
tex.data.resize(tex.size.x * tex.size.y);
{
PoolVector<uint8_t>::Read r = p_alpha_texture->get_data().read();
uint8_t *ptrw = tex.data.ptrw();
for (int i = 0; i < tex.size.x * tex.size.y; ++i) {
ptrw[i] = r[i];
}
}
alpha_textures.insert(p_id, tex);
}
}
float blerp(float c00, float c10, float c01, float c11, float tx, float ty) {
return Math::lerp(Math::lerp(c00, c10, tx), Math::lerp(c01, c11, tx), ty);
}
uint8_t LightmapRaycasterEmbree::AlphaTextureData::sample(float u, float v) const {
float x = u * size.x;
float y = v * size.y;
int xi = (int)x;
int yi = (int)y;
uint8_t texels[4];
for (int i = 0; i < 4; ++i) {
int sample_x = CLAMP(xi + i % 2, 0, size.x - 1);
int sample_y = CLAMP(yi + i / 2, 0, size.y - 1);
texels[i] = data[sample_y * size.x + sample_x];
}
return Math::round(blerp(texels[0], texels[1], texels[2], texels[3], x - xi, y - yi));
}
void LightmapRaycasterEmbree::add_mesh(const Vector<Vector3> &p_vertices, const Vector<Vector3> &p_normals, const Vector<Vector2> &p_uv2s, unsigned int p_id) {
RTCGeometry embree_mesh = rtcNewGeometry(embree_device, RTC_GEOMETRY_TYPE_TRIANGLE);
rtcSetGeometryVertexAttributeCount(embree_mesh, 2);
int vertex_count = p_vertices.size();
ERR_FAIL_COND(vertex_count % 3 != 0);
ERR_FAIL_COND(vertex_count != p_uv2s.size());
Vec3fa *embree_vertices = (Vec3fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, sizeof(Vec3fa), vertex_count);
Vec2fa *embree_light_uvs = (Vec2fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 0, RTC_FORMAT_FLOAT2, sizeof(Vec2fa), vertex_count);
uint32_t *embree_triangles = (uint32_t *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(uint32_t) * 3, vertex_count / 3);
Vec3fa *embree_normals = nullptr;
if (!p_normals.empty()) {
embree_normals = (Vec3fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 1, RTC_FORMAT_FLOAT3, sizeof(Vec3fa), vertex_count);
}
for (uint32_t i = 0; i < vertex_count; i++) {
embree_vertices[i] = Vec3fa(p_vertices[i].x, p_vertices[i].y, p_vertices[i].z);
embree_light_uvs[i] = Vec2fa(p_uv2s[i].x, p_uv2s[i].y);
if (embree_normals != nullptr) {
embree_normals[i] = Vec3fa(p_normals[i].x, p_normals[i].y, p_normals[i].z);
}
embree_triangles[i] = i;
}
rtcCommitGeometry(embree_mesh);
rtcSetGeometryIntersectFilterFunction(embree_mesh, filter_function);
rtcSetGeometryUserData(embree_mesh, this);
rtcAttachGeometryByID(embree_scene, embree_mesh, p_id);
rtcReleaseGeometry(embree_mesh);
}
void LightmapRaycasterEmbree::commit() {
rtcCommitScene(embree_scene);
}
void LightmapRaycasterEmbree::set_mesh_filter(const Set<int> &p_mesh_ids) {
for (Set<int>::Element *E = p_mesh_ids.front(); E; E = E->next()) {
rtcDisableGeometry(rtcGetGeometry(embree_scene, E->get()));
}
rtcCommitScene(embree_scene);
filter_meshes = p_mesh_ids;
}
void LightmapRaycasterEmbree::clear_mesh_filter() {
for (Set<int>::Element *E = filter_meshes.front(); E; E = E->next()) {
rtcEnableGeometry(rtcGetGeometry(embree_scene, E->get()));
}
rtcCommitScene(embree_scene);
filter_meshes.clear();
}
void embree_error_handler(void *p_user_data, RTCError p_code, const char *p_str) {
print_error("Embree error: " + String(p_str));
}
LightmapRaycasterEmbree::LightmapRaycasterEmbree() {
embree_device = rtcNewDevice(nullptr);
rtcSetDeviceErrorFunction(embree_device, &embree_error_handler, nullptr);
embree_scene = rtcNewScene(embree_device);
}
LightmapRaycasterEmbree::~LightmapRaycasterEmbree() {
if (embree_scene != nullptr)
rtcReleaseScene(embree_scene);
if (embree_device != nullptr)
rtcReleaseDevice(embree_device);
}

View File

@ -0,0 +1,73 @@
/*************************************************************************/
/* lightmap_raycaster.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "core/object.h"
#include "scene/3d/lightmapper.h"
#include "scene/resources/mesh.h"
#include <embree3/rtcore.h>
class LightmapRaycasterEmbree : public LightmapRaycaster {
GDCLASS(LightmapRaycasterEmbree, LightmapRaycaster);
private:
struct AlphaTextureData {
Vector<uint8_t> data;
Vector2i size;
uint8_t sample(float u, float v) const;
};
RTCDevice embree_device;
RTCScene embree_scene;
static void filter_function(const struct RTCFilterFunctionNArguments *p_args);
Map<unsigned int, AlphaTextureData> alpha_textures;
Set<int> filter_meshes;
public:
virtual bool intersect(Ray &p_ray);
virtual void intersect(Vector<Ray> &r_rays);
virtual void add_mesh(const Vector<Vector3> &p_vertices, const Vector<Vector3> &p_normals, const Vector<Vector2> &p_uv2s, unsigned int p_id);
virtual void set_mesh_alpha_texture(Ref<Image> p_alpha_texture, unsigned int p_id);
virtual void commit();
virtual void set_mesh_filter(const Set<int> &p_mesh_ids);
virtual void clear_mesh_filter();
static LightmapRaycaster *create_embree_raycaster();
static void make_default_raycaster();
LightmapRaycasterEmbree();
~LightmapRaycasterEmbree();
};

View File

@ -0,0 +1,40 @@
/*************************************************************************/
/* register_types.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "register_types.h"
#include "lightmap_raycaster.h"
void register_raycast_types() {
LightmapRaycasterEmbree::make_default_raycaster();
}
void unregister_raycast_types() {
}

View File

@ -0,0 +1,32 @@
/*************************************************************************/
/* register_types.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
void register_raycast_types();
void unregister_raycast_types();

View File

@ -56,18 +56,19 @@ bool xatlas_mesh_lightmap_unwrap_callback(float p_texel_size, const float *p_ver
input_mesh.vertexUvStride = 0;
xatlas::ChartOptions chart_options;
xatlas::PackOptions pack_options;
chart_options.fixWinding = true;
pack_options.maxChartSize = 4096;
xatlas::PackOptions pack_options;
pack_options.padding = 1;
pack_options.maxChartSize = 4094; // Lightmap atlassing needs 2 for padding between meshes, so 4096-2
pack_options.blockAlign = true;
pack_options.texelsPerUnit = 1.0 / p_texel_size;
xatlas::Atlas *atlas = xatlas::Create();
printf("Adding mesh..\n");
xatlas::AddMeshError err = xatlas::AddMesh(atlas, input_mesh, 1);
ERR_FAIL_COND_V_MSG(err != xatlas::AddMeshError::Success, false, xatlas::StringForEnum(err));
printf("Generate..\n");
xatlas::Generate(atlas, chart_options, pack_options);
*r_size_hint_x = atlas->width;
@ -96,7 +97,6 @@ bool xatlas_mesh_lightmap_unwrap_callback(float p_texel_size, const float *p_ver
max_y = MAX(max_y, output.vertexArray[i].uv[1]);
}
printf("Final texture size: %f,%f - max %f,%f\n", w, h, max_x, max_y);
*r_vertex_count = output.vertexCount;
for (uint32_t i = 0; i < output.indexCount; i++) {
@ -106,7 +106,7 @@ bool xatlas_mesh_lightmap_unwrap_callback(float p_texel_size, const float *p_ver
*r_index_count = output.indexCount;
xatlas::Destroy(atlas);
printf("Done\n");
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -31,12 +31,15 @@
#ifndef BAKED_INDIRECT_LIGHT_H
#define BAKED_INDIRECT_LIGHT_H
#include "core/local_vector.h"
#include "multimesh_instance.h"
#include "scene/3d/light.h"
#include "scene/3d/lightmapper.h"
#include "scene/3d/visual_instance.h"
class BakedLightmapData : public Resource {
GDCLASS(BakedLightmapData, Resource);
RES_BASE_EXTENSION("lmbake")
RID baked_light;
AABB bounds;
@ -47,7 +50,12 @@ class BakedLightmapData : public Resource {
struct User {
NodePath path;
Ref<Texture> lightmap;
struct {
Ref<Texture> single;
Ref<TextureLayered> layered;
} lightmap;
int lightmap_slice;
Rect2 lightmap_uv_rect;
int instance_index;
};
@ -75,10 +83,12 @@ public:
void set_energy(float p_energy);
float get_energy() const;
void add_user(const NodePath &p_path, const Ref<Texture> &p_lightmap, int p_instance = -1);
void add_user(const NodePath &p_path, const Ref<Resource> &p_lightmap, int p_lightmap_slice, const Rect2 &p_lightmap_uv_rect, int p_instance);
int get_user_count() const;
NodePath get_user_path(int p_user) const;
Ref<Texture> get_user_lightmap(int p_user) const;
Ref<Resource> get_user_lightmap(int p_user) const;
int get_user_lightmap_slice(int p_user) const;
Rect2 get_user_lightmap_uv_rect(int p_user) const;
int get_user_instance(int p_user) const;
void clear_users();
@ -94,12 +104,8 @@ public:
enum BakeQuality {
BAKE_QUALITY_LOW,
BAKE_QUALITY_MEDIUM,
BAKE_QUALITY_HIGH
};
enum BakeMode {
BAKE_MODE_CONE_TRACE,
BAKE_MODE_RAY_TRACE,
BAKE_QUALITY_HIGH,
BAKE_QUALITY_ULTRA
};
enum BakeError {
@ -107,108 +113,154 @@ public:
BAKE_ERROR_NO_SAVE_PATH,
BAKE_ERROR_NO_MESHES,
BAKE_ERROR_CANT_CREATE_IMAGE,
BAKE_ERROR_USER_ABORTED
BAKE_ERROR_LIGHTMAP_SIZE,
BAKE_ERROR_INVALID_MESH,
BAKE_ERROR_USER_ABORTED,
BAKE_ERROR_NO_LIGHTMAPPER
};
typedef void (*BakeBeginFunc)(int);
typedef bool (*BakeStepFunc)(int, const String &);
typedef void (*BakeEndFunc)();
enum EnvironmentMode {
ENVIRONMENT_MODE_DISABLED,
ENVIRONMENT_MODE_SCENE,
ENVIRONMENT_MODE_CUSTOM_SKY,
ENVIRONMENT_MODE_CUSTOM_COLOR
};
struct BakeStepUD {
Lightmapper::BakeStepFunc func;
void *ud;
float from_percent;
float to_percent;
};
struct LightsFound {
Transform xform;
Light *light;
};
struct MeshesFound {
Transform xform;
NodePath node_path;
int32_t subindex;
Ref<Mesh> mesh;
int32_t lightmap_scale;
Vector<Ref<Material> > overrides;
bool cast_shadows;
bool generate_lightmap;
};
private:
float bake_cell_size;
float capture_cell_size;
Vector3 extents;
float bake_default_texels_per_unit;
float propagation;
float energy;
float default_texels_per_unit;
float bias;
BakeQuality bake_quality;
BakeMode bake_mode;
bool hdr;
String image_path;
bool generate_atlas;
int max_atlas_size;
bool capture_enabled;
int bounces;
bool use_denoiser;
EnvironmentMode environment_mode;
Ref<Sky> environment_custom_sky;
Vector3 environment_custom_sky_rotation_degrees;
Color environment_custom_color;
float environment_custom_energy;
BakeQuality capture_quality;
float capture_propagation;
float capture_cell_size;
String image_path; // (Deprecated property)
Ref<BakedLightmapData> light_data;
struct PlotMesh {
Ref<Material> override_material;
Vector<Ref<Material> > instance_materials;
Ref<Mesh> mesh;
Transform local_xform;
NodePath path;
int instance_idx;
};
struct PlotLight {
Light *light;
Transform local_xform;
};
void _find_meshes_and_lights(Node *p_at_node, List<PlotMesh> &plot_meshes, List<PlotLight> &plot_lights);
void _debug_bake();
void _assign_lightmaps();
void _clear_lightmaps();
static bool _bake_time(void *ud, float p_secs, float p_progress);
void _get_material_images(const MeshesFound &p_found_mesh, Lightmapper::MeshData &r_mesh_data, Vector<Ref<Texture> > &r_albedo_textures, Vector<Ref<Texture> > &r_emission_textures);
Ref<Image> _get_irradiance_from_sky(Ref<Sky> p_sky, Vector2i p_size);
Ref<Image> _get_irradiance_map(Ref<Environment> p_env, Vector2i p_size);
void _find_meshes_and_lights(Node *p_at_node, Vector<MeshesFound> &meshes, Vector<LightsFound> &lights);
Vector2i _compute_lightmap_size(const MeshesFound &p_mesh);
struct BakeTimeData {
String text;
int pass;
uint64_t last_step;
};
static bool _lightmap_bake_step_function(float p_completion, const String &p_text, void *ud, bool p_refresh);
protected:
static void _bind_methods();
void _validate_property(PropertyInfo &property) const;
void _notification(int p_what);
public:
static BakeBeginFunc bake_begin_function;
static BakeStepFunc bake_step_function;
static BakeEndFunc bake_end_function;
static Lightmapper::BakeStepFunc bake_step_function;
static Lightmapper::BakeStepFunc bake_substep_function;
void set_light_data(const Ref<BakedLightmapData> &p_data);
Ref<BakedLightmapData> get_light_data() const;
void set_bake_cell_size(float p_cell_size);
float get_bake_cell_size() const;
void set_capture_cell_size(float p_cell_size);
float get_capture_cell_size() const;
void set_extents(const Vector3 &p_extents);
Vector3 get_extents() const;
void set_bake_default_texels_per_unit(const float &p_bake_texels_per_unit);
float get_bake_default_texels_per_unit() const;
void set_default_texels_per_unit(const float &p_extents);
float get_default_texels_per_unit() const;
void set_propagation(float p_propagation);
float get_propagation() const;
void set_capture_propagation(float p_propagation);
float get_capture_propagation() const;
void set_energy(float p_energy);
float get_energy() const;
void set_capture_quality(BakeQuality p_quality);
BakeQuality get_capture_quality() const;
void set_bake_quality(BakeQuality p_quality);
BakeQuality get_bake_quality() const;
void set_bake_mode(BakeMode p_mode);
BakeMode get_bake_mode() const;
void set_generate_atlas(bool p_enabled);
bool is_generate_atlas_enabled() const;
void set_hdr(bool p_enable);
bool is_hdr() const;
void set_max_atlas_size(int p_size);
int get_max_atlas_size() const;
void set_capture_enabled(bool p_enable);
bool get_capture_enabled() const;
void set_image_path(const String &p_path);
String get_image_path() const;
void set_environment_mode(EnvironmentMode p_mode);
EnvironmentMode get_environment_mode() const;
void set_environment_custom_sky(const Ref<Sky> &p_sky);
Ref<Sky> get_environment_custom_sky() const;
void set_environment_custom_sky_rotation_degrees(const Vector3 &p_rotation);
Vector3 get_environment_custom_sky_rotation_degrees() const;
void set_environment_custom_color(const Color &p_color);
Color get_environment_custom_color() const;
void set_environment_custom_energy(float p_energy);
float get_environment_custom_energy() const;
void set_use_denoiser(bool p_enable);
bool is_using_denoiser() const;
void set_bounces(int p_bounces);
int get_bounces() const;
void set_bias(float p_bias);
float get_bias() const;
AABB get_aabb() const;
PoolVector<Face3> get_faces(uint32_t p_usage_flags) const;
BakeError bake(Node *p_from_node, bool p_create_visual_debug = false);
BakeError bake(Node *p_from_node, String p_data_save_path = "");
BakedLightmap();
};
VARIANT_ENUM_CAST(BakedLightmap::BakeQuality);
VARIANT_ENUM_CAST(BakedLightmap::BakeMode);
VARIANT_ENUM_CAST(BakedLightmap::BakeError);
VARIANT_ENUM_CAST(BakedLightmap::EnvironmentMode);
#endif // BAKED_INDIRECT_LIGHT_H

76
scene/3d/lightmapper.cpp Normal file
View File

@ -0,0 +1,76 @@
/*************************************************************************/
/* lightmapper.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "lightmapper.h"
LightmapDenoiser *(*LightmapDenoiser::create_function)() = nullptr;
Ref<LightmapDenoiser> LightmapDenoiser::create() {
if (create_function) {
return Ref<LightmapDenoiser>(create_function());
}
return Ref<LightmapDenoiser>();
}
LightmapRaycaster *(*LightmapRaycaster::create_function)() = nullptr;
Ref<LightmapRaycaster> LightmapRaycaster::create() {
if (create_function) {
return Ref<LightmapRaycaster>(create_function());
}
return Ref<LightmapRaycaster>();
}
Lightmapper::CreateFunc Lightmapper::create_custom = nullptr;
Lightmapper::CreateFunc Lightmapper::create_gpu = nullptr;
Lightmapper::CreateFunc Lightmapper::create_cpu = nullptr;
Ref<Lightmapper> Lightmapper::create() {
Lightmapper *lm = nullptr;
if (create_custom) {
lm = create_custom();
}
if (!lm && create_gpu) {
lm = create_gpu();
}
if (!lm && create_cpu) {
lm = create_cpu();
}
if (!lm) {
return Ref<Lightmapper>();
} else {
return Ref<Lightmapper>(lm);
}
}
Lightmapper::Lightmapper() {
}

196
scene/3d/lightmapper.h Normal file
View File

@ -0,0 +1,196 @@
/*************************************************************************/
/* lightmapper.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef LIGHTMAPPER_H
#define LIGHTMAPPER_H
#include "scene/resources/mesh.h"
#if !defined(__aligned)
#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__)
#define __aligned(...) __declspec(align(__VA_ARGS__))
#else
#define __aligned(...) __attribute__((aligned(__VA_ARGS__)))
#endif
#endif
class LightmapDenoiser : public Reference {
GDCLASS(LightmapDenoiser, Reference)
protected:
static LightmapDenoiser *(*create_function)();
public:
virtual Ref<Image> denoise_image(const Ref<Image> &p_image) = 0;
static Ref<LightmapDenoiser> create();
};
class LightmapRaycaster : public Reference {
GDCLASS(LightmapRaycaster, Reference)
protected:
static LightmapRaycaster *(*create_function)();
public:
// compatible with embree3 rays
struct __aligned(16) Ray {
const static unsigned int INVALID_GEOMETRY_ID = ((unsigned int)-1); // from rtcore_common.h
/*! Default construction does nothing. */
_FORCE_INLINE_ Ray() :
geomID(INVALID_GEOMETRY_ID) {}
/*! Constructs a ray from origin, direction, and ray segment. Near
* has to be smaller than far. */
_FORCE_INLINE_ Ray(const Vector3 &org,
const Vector3 &dir,
float tnear = 0.0f,
float tfar = INFINITY) :
org(org),
tnear(tnear),
dir(dir),
time(0.0f),
tfar(tfar),
mask(-1),
u(0.0),
v(0.0),
primID(INVALID_GEOMETRY_ID),
geomID(INVALID_GEOMETRY_ID),
instID(INVALID_GEOMETRY_ID) {}
/*! Tests if we hit something. */
_FORCE_INLINE_ explicit operator bool() const { return geomID != INVALID_GEOMETRY_ID; }
public:
Vector3 org; //!< Ray origin + tnear
float tnear; //!< Start of ray segment
Vector3 dir; //!< Ray direction + tfar
float time; //!< Time of this ray for motion blur.
float tfar; //!< End of ray segment
unsigned int mask; //!< used to mask out objects during traversal
unsigned int id; //!< ray ID
unsigned int flags; //!< ray flags
Vector3 normal; //!< Not normalized geometry normal
float u; //!< Barycentric u coordinate of hit
float v; //!< Barycentric v coordinate of hit
unsigned int primID; //!< primitive ID
unsigned int geomID; //!< geometry ID
unsigned int instID; //!< instance ID
};
virtual bool intersect(Ray &p_ray) = 0;
virtual void intersect(Vector<Ray> &r_rays) = 0;
virtual void add_mesh(const Vector<Vector3> &p_vertices, const Vector<Vector3> &p_normals, const Vector<Vector2> &p_uv2s, unsigned int p_id) = 0;
virtual void set_mesh_alpha_texture(Ref<Image> p_alpha_texture, unsigned int p_id) = 0;
virtual void commit() = 0;
virtual void set_mesh_filter(const Set<int> &p_mesh_ids) = 0;
virtual void clear_mesh_filter() = 0;
static Ref<LightmapRaycaster> create();
};
class Lightmapper : public Reference {
GDCLASS(Lightmapper, Reference)
public:
enum LightType {
LIGHT_TYPE_DIRECTIONAL,
LIGHT_TYPE_OMNI,
LIGHT_TYPE_SPOT
};
enum BakeError {
BAKE_ERROR_LIGHTMAP_TOO_SMALL,
BAKE_ERROR_LIGHTMAP_CANT_PRE_BAKE_MESHES,
BAKE_ERROR_NO_MESHES,
BAKE_ERROR_USER_ABORTED,
BAKE_ERROR_NO_RAYCASTER,
BAKE_OK
};
enum BakeQuality {
BAKE_QUALITY_LOW,
BAKE_QUALITY_MEDIUM,
BAKE_QUALITY_HIGH,
BAKE_QUALITY_ULTRA,
};
typedef Lightmapper *(*CreateFunc)();
static CreateFunc create_custom;
static CreateFunc create_gpu;
static CreateFunc create_cpu;
protected:
public:
typedef bool (*BakeStepFunc)(float, const String &, void *, bool); //progress, step description, userdata, force refresh
struct MeshData {
struct TextureDef {
RID tex_rid;
Color mul;
Color add;
};
//triangle data
Vector<Vector3> points;
Vector<Vector2> uv;
Vector<Vector2> uv2;
Vector<Vector3> normal;
Vector<TextureDef> albedo;
Vector<TextureDef> emission;
Vector<int> surface_facecounts;
Variant userdata;
};
virtual void add_albedo_texture(Ref<Texture> p_texture) = 0;
virtual void add_emission_texture(Ref<Texture> p_texture) = 0;
virtual void add_mesh(const MeshData &p_mesh, Vector2i p_size) = 0;
virtual void add_directional_light(bool p_bake_direct, const Vector3 &p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier) = 0;
virtual void add_omni_light(bool p_bake_direct, const Vector3 &p_position, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation) = 0;
virtual void add_spot_light(bool p_bake_direct, const Vector3 &p_position, const Vector3 p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation, float p_spot_angle, float p_spot_attenuation) = 0;
virtual BakeError bake(BakeQuality p_quality, bool p_use_denoiser, int p_bounces, float p_bias, bool p_generate_atlas, int p_max_texture_size, const Ref<Image> &p_environment_panorama, const Basis &p_environment_transform, BakeStepFunc p_step_function = nullptr, void *p_step_userdata = nullptr, BakeStepFunc p_substep_function = nullptr) = 0;
virtual int get_bake_texture_count() const = 0;
virtual Ref<Image> get_bake_texture(int p_index) const = 0;
virtual int get_bake_mesh_count() const = 0;
virtual Variant get_bake_mesh_userdata(int p_index) const = 0;
virtual Rect2 get_bake_mesh_uv_scale(int p_index) const = 0;
virtual int get_bake_mesh_texture_slice(int p_index) const = 0;
static Ref<Lightmapper> create();
Lightmapper();
};
#endif // LIGHTMAPPER_H

View File

@ -170,6 +170,23 @@ Ref<Material> GeometryInstance::get_material_override() const {
return material_override;
}
void GeometryInstance::set_generate_lightmap(bool p_enabled) {
generate_lightmap = p_enabled;
}
bool GeometryInstance::get_generate_lightmap() {
return generate_lightmap;
}
void GeometryInstance::set_lightmap_scale(LightmapScale p_scale) {
ERR_FAIL_INDEX(p_scale, LIGHTMAP_SCALE_MAX);
lightmap_scale = p_scale;
}
GeometryInstance::LightmapScale GeometryInstance::get_lightmap_scale() const {
return lightmap_scale;
}
void GeometryInstance::set_lod_min_distance(float p_dist) {
lod_min_distance = p_dist;
@ -274,6 +291,12 @@ void GeometryInstance::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_cast_shadows_setting", "shadow_casting_setting"), &GeometryInstance::set_cast_shadows_setting);
ClassDB::bind_method(D_METHOD("get_cast_shadows_setting"), &GeometryInstance::get_cast_shadows_setting);
ClassDB::bind_method(D_METHOD("set_generate_lightmap", "enabled"), &GeometryInstance::set_generate_lightmap);
ClassDB::bind_method(D_METHOD("get_generate_lightmap"), &GeometryInstance::get_generate_lightmap);
ClassDB::bind_method(D_METHOD("set_lightmap_scale", "scale"), &GeometryInstance::set_lightmap_scale);
ClassDB::bind_method(D_METHOD("get_lightmap_scale"), &GeometryInstance::get_lightmap_scale);
ClassDB::bind_method(D_METHOD("set_lod_max_hysteresis", "mode"), &GeometryInstance::set_lod_max_hysteresis);
ClassDB::bind_method(D_METHOD("get_lod_max_hysteresis"), &GeometryInstance::get_lod_max_hysteresis);
@ -297,7 +320,11 @@ void GeometryInstance::_bind_methods() {
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "material_override", PROPERTY_HINT_RESOURCE_TYPE, "ShaderMaterial,SpatialMaterial"), "set_material_override", "get_material_override");
ADD_PROPERTY(PropertyInfo(Variant::INT, "cast_shadow", PROPERTY_HINT_ENUM, "Off,On,Double-Sided,Shadows Only"), "set_cast_shadows_setting", "get_cast_shadows_setting");
ADD_PROPERTY(PropertyInfo(Variant::REAL, "extra_cull_margin", PROPERTY_HINT_RANGE, "0,16384,0.01"), "set_extra_cull_margin", "get_extra_cull_margin");
ADD_GROUP("Baked Light", "");
ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "use_in_baked_light"), "set_flag", "get_flag", FLAG_USE_BAKED_LIGHT);
ADD_PROPERTY(PropertyInfo(Variant::BOOL, "generate_lightmap"), "set_generate_lightmap", "get_generate_lightmap");
ADD_PROPERTY(PropertyInfo(Variant::INT, "lightmap_scale", PROPERTY_HINT_ENUM, "1x,2x,4x,8x"), "set_lightmap_scale", "get_lightmap_scale");
ADD_GROUP("LOD", "lod_");
ADD_PROPERTY(PropertyInfo(Variant::INT, "lod_min_distance", PROPERTY_HINT_RANGE, "0,32768,0.01"), "set_lod_min_distance", "get_lod_min_distance");
@ -307,6 +334,12 @@ void GeometryInstance::_bind_methods() {
//ADD_SIGNAL( MethodInfo("visibility_changed"));
BIND_ENUM_CONSTANT(LIGHTMAP_SCALE_1X);
BIND_ENUM_CONSTANT(LIGHTMAP_SCALE_2X);
BIND_ENUM_CONSTANT(LIGHTMAP_SCALE_4X);
BIND_ENUM_CONSTANT(LIGHTMAP_SCALE_8X);
BIND_ENUM_CONSTANT(LIGHTMAP_SCALE_MAX);
BIND_ENUM_CONSTANT(SHADOW_CASTING_SETTING_OFF);
BIND_ENUM_CONSTANT(SHADOW_CASTING_SETTING_ON);
BIND_ENUM_CONSTANT(SHADOW_CASTING_SETTING_DOUBLE_SIDED);
@ -329,5 +362,7 @@ GeometryInstance::GeometryInstance() {
shadow_casting_setting = SHADOW_CASTING_SETTING_ON;
extra_cull_margin = 0;
generate_lightmap = true;
lightmap_scale = LightmapScale::LIGHTMAP_SCALE_1X;
//VS::get_singleton()->instance_geometry_set_baked_light_texture_index(get_instance(),0);
}

View File

@ -91,6 +91,14 @@ public:
FLAG_MAX = VS::INSTANCE_FLAG_MAX,
};
enum LightmapScale {
LIGHTMAP_SCALE_1X,
LIGHTMAP_SCALE_2X,
LIGHTMAP_SCALE_4X,
LIGHTMAP_SCALE_8X,
LIGHTMAP_SCALE_MAX,
};
enum ShadowCastingSetting {
SHADOW_CASTING_SETTING_OFF = VS::SHADOW_CASTING_SETTING_OFF,
SHADOW_CASTING_SETTING_ON = VS::SHADOW_CASTING_SETTING_ON,
@ -100,6 +108,8 @@ public:
private:
bool flags[FLAG_MAX];
bool generate_lightmap;
LightmapScale lightmap_scale;
ShadowCastingSetting shadow_casting_setting;
Ref<Material> material_override;
float lod_min_distance;
@ -120,6 +130,15 @@ public:
void set_cast_shadows_setting(ShadowCastingSetting p_shadow_casting_setting);
ShadowCastingSetting get_cast_shadows_setting() const;
void set_bake_cast_shadows(bool p_enabled);
bool get_bake_cast_shadows();
void set_generate_lightmap(bool p_enabled);
bool get_generate_lightmap();
void set_lightmap_scale(LightmapScale p_scale);
LightmapScale get_lightmap_scale() const;
void set_lod_min_distance(float p_dist);
float get_lod_min_distance() const;
@ -144,6 +163,7 @@ public:
};
VARIANT_ENUM_CAST(GeometryInstance::Flags);
VARIANT_ENUM_CAST(GeometryInstance::LightmapScale);
VARIANT_ENUM_CAST(GeometryInstance::ShadowCastingSetting);
#endif

View File

@ -718,12 +718,10 @@ void VoxelLightBaker::_init_light_plot(int p_idx, int p_level, int p_x, int p_y,
}
}
void VoxelLightBaker::begin_bake_light(BakeQuality p_quality, BakeMode p_bake_mode, float p_propagation, float p_energy) {
void VoxelLightBaker::begin_bake_light(BakeQuality p_quality, float p_propagation) {
_check_init_light();
propagation = p_propagation;
bake_quality = p_quality;
bake_mode = p_bake_mode;
energy = p_energy;
}
void VoxelLightBaker::_check_init_light() {
@ -733,7 +731,6 @@ void VoxelLightBaker::_check_init_light() {
leaf_voxel_count = 0;
_fixup_plot(0, 0); //pre fixup, so normal, albedo, emission, etc. work for lighting.
bake_light.resize(bake_cells.size());
print_line("bake light size: " + itos(bake_light.size()));
//zeromem(bake_light.ptrw(), bake_light.size() * sizeof(Light));
first_leaf = -1;
_init_light_plot(0, 0, 0, 0, 0, CHILD_EMPTY);
@ -1289,872 +1286,6 @@ void VoxelLightBaker::_fixup_plot(int p_idx, int p_level) {
}
}
//make sure any cell (save for the root) has an empty cell previous to it, so it can be interpolated into
void VoxelLightBaker::_plot_triangle(Vector2 *vertices, Vector3 *positions, Vector3 *normals, LightMap *pixels, int width, int height) {
int x[3];
int y[3];
for (int j = 0; j < 3; j++) {
x[j] = vertices[j].x * width;
y[j] = vertices[j].y * height;
//x[j] = CLAMP(x[j], 0, bt.width - 1);
//y[j] = CLAMP(y[j], 0, bt.height - 1);
}
// sort the points vertically
if (y[1] > y[2]) {
SWAP(x[1], x[2]);
SWAP(y[1], y[2]);
SWAP(positions[1], positions[2]);
SWAP(normals[1], normals[2]);
}
if (y[0] > y[1]) {
SWAP(x[0], x[1]);
SWAP(y[0], y[1]);
SWAP(positions[0], positions[1]);
SWAP(normals[0], normals[1]);
}
if (y[1] > y[2]) {
SWAP(x[1], x[2]);
SWAP(y[1], y[2]);
SWAP(positions[1], positions[2]);
SWAP(normals[1], normals[2]);
}
double dx_far = double(x[2] - x[0]) / (y[2] - y[0] + 1);
double dx_upper = double(x[1] - x[0]) / (y[1] - y[0] + 1);
double dx_low = double(x[2] - x[1]) / (y[2] - y[1] + 1);
double xf = x[0];
double xt = x[0] + dx_upper; // if y[0] == y[1], special case
for (int yi = y[0]; yi <= (y[2] > height - 1 ? height - 1 : y[2]); yi++) {
if (yi >= 0) {
for (int xi = (xf > 0 ? int(xf) : 0); xi <= (xt < width ? xt : width - 1); xi++) {
//pixels[int(x + y * width)] = color;
Vector2 v0 = Vector2(x[1] - x[0], y[1] - y[0]);
Vector2 v1 = Vector2(x[2] - x[0], y[2] - y[0]);
//vertices[2] - vertices[0];
Vector2 v2 = Vector2(xi - x[0], yi - y[0]);
float d00 = v0.dot(v0);
float d01 = v0.dot(v1);
float d11 = v1.dot(v1);
float d20 = v2.dot(v0);
float d21 = v2.dot(v1);
float denom = (d00 * d11 - d01 * d01);
Vector3 pos;
Vector3 normal;
if (denom == 0) {
pos = positions[0];
normal = normals[0];
} else {
float v = (d11 * d20 - d01 * d21) / denom;
float w = (d00 * d21 - d01 * d20) / denom;
float u = 1.0f - v - w;
pos = positions[0] * u + positions[1] * v + positions[2] * w;
normal = normals[0] * u + normals[1] * v + normals[2] * w;
}
int ofs = yi * width + xi;
pixels[ofs].normal = normal;
pixels[ofs].pos = pos;
}
for (int xi = (xf < width ? int(xf) : width - 1); xi >= (xt > 0 ? xt : 0); xi--) {
//pixels[int(x + y * width)] = color;
Vector2 v0 = Vector2(x[1] - x[0], y[1] - y[0]);
Vector2 v1 = Vector2(x[2] - x[0], y[2] - y[0]);
//vertices[2] - vertices[0];
Vector2 v2 = Vector2(xi - x[0], yi - y[0]);
float d00 = v0.dot(v0);
float d01 = v0.dot(v1);
float d11 = v1.dot(v1);
float d20 = v2.dot(v0);
float d21 = v2.dot(v1);
float denom = (d00 * d11 - d01 * d01);
Vector3 pos;
Vector3 normal;
if (denom == 0) {
pos = positions[0];
normal = normals[0];
} else {
float v = (d11 * d20 - d01 * d21) / denom;
float w = (d00 * d21 - d01 * d20) / denom;
float u = 1.0f - v - w;
pos = positions[0] * u + positions[1] * v + positions[2] * w;
normal = normals[0] * u + normals[1] * v + normals[2] * w;
}
int ofs = yi * width + xi;
pixels[ofs].normal = normal;
pixels[ofs].pos = pos;
}
}
xf += dx_far;
if (yi < y[1])
xt += dx_upper;
else
xt += dx_low;
}
}
void VoxelLightBaker::_sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha) {
int size = 1 << (cell_subdiv - 1);
int clamp_v = size - 1;
//first of all, clamp
Vector3 pos;
pos.x = CLAMP(p_posf.x, 0, clamp_v);
pos.y = CLAMP(p_posf.y, 0, clamp_v);
pos.z = CLAMP(p_posf.z, 0, clamp_v);
float level = (cell_subdiv - 1) - p_level;
int target_level;
float level_filter;
if (level <= 0.0) {
level_filter = 0;
target_level = 0;
} else {
target_level = Math::ceil(level);
level_filter = target_level - level;
}
const Cell *cells = bake_cells.ptr();
const Light *light = bake_light.ptr();
Vector3 color[2][8];
float alpha[2][8];
zeromem(alpha, sizeof(float) * 2 * 8);
//find cell at given level first
for (int c = 0; c < 2; c++) {
int current_level = MAX(0, target_level - c);
int level_cell_size = (1 << (cell_subdiv - 1)) >> current_level;
for (int n = 0; n < 8; n++) {
int x = int(pos.x);
int y = int(pos.y);
int z = int(pos.z);
if (n & 1)
x += level_cell_size;
if (n & 2)
y += level_cell_size;
if (n & 4)
z += level_cell_size;
int ofs_x = 0;
int ofs_y = 0;
int ofs_z = 0;
x = CLAMP(x, 0, clamp_v);
y = CLAMP(y, 0, clamp_v);
z = CLAMP(z, 0, clamp_v);
int half = size / 2;
uint32_t cell = 0;
for (int i = 0; i < current_level; i++) {
const Cell *bc = &cells[cell];
int child = 0;
if (x >= ofs_x + half) {
child |= 1;
ofs_x += half;
}
if (y >= ofs_y + half) {
child |= 2;
ofs_y += half;
}
if (z >= ofs_z + half) {
child |= 4;
ofs_z += half;
}
cell = bc->children[child];
if (cell == CHILD_EMPTY)
break;
half >>= 1;
}
if (cell == CHILD_EMPTY) {
alpha[c][n] = 0;
} else {
alpha[c][n] = cells[cell].alpha;
for (int i = 0; i < 6; i++) {
//anisotropic read light
float amount = p_direction.dot(aniso_normal[i]);
if (amount < 0)
amount = 0;
color[c][n].x += light[cell].accum[i][0] * amount;
color[c][n].y += light[cell].accum[i][1] * amount;
color[c][n].z += light[cell].accum[i][2] * amount;
}
color[c][n].x += cells[cell].emission[0];
color[c][n].y += cells[cell].emission[1];
color[c][n].z += cells[cell].emission[2];
}
}
}
float target_level_size = size >> target_level;
Vector3 pos_fract[2];
pos_fract[0].x = Math::fmod(pos.x, target_level_size) / target_level_size;
pos_fract[0].y = Math::fmod(pos.y, target_level_size) / target_level_size;
pos_fract[0].z = Math::fmod(pos.z, target_level_size) / target_level_size;
target_level_size = size >> MAX(0, target_level - 1);
pos_fract[1].x = Math::fmod(pos.x, target_level_size) / target_level_size;
pos_fract[1].y = Math::fmod(pos.y, target_level_size) / target_level_size;
pos_fract[1].z = Math::fmod(pos.z, target_level_size) / target_level_size;
float alpha_interp[2];
Vector3 color_interp[2];
for (int i = 0; i < 2; i++) {
Vector3 color_x00 = color[i][0].linear_interpolate(color[i][1], pos_fract[i].x);
Vector3 color_xy0 = color[i][2].linear_interpolate(color[i][3], pos_fract[i].x);
Vector3 blend_z0 = color_x00.linear_interpolate(color_xy0, pos_fract[i].y);
Vector3 color_x0z = color[i][4].linear_interpolate(color[i][5], pos_fract[i].x);
Vector3 color_xyz = color[i][6].linear_interpolate(color[i][7], pos_fract[i].x);
Vector3 blend_z1 = color_x0z.linear_interpolate(color_xyz, pos_fract[i].y);
color_interp[i] = blend_z0.linear_interpolate(blend_z1, pos_fract[i].z);
float alpha_x00 = Math::lerp(alpha[i][0], alpha[i][1], pos_fract[i].x);
float alpha_xy0 = Math::lerp(alpha[i][2], alpha[i][3], pos_fract[i].x);
float alpha_z0 = Math::lerp(alpha_x00, alpha_xy0, pos_fract[i].y);
float alpha_x0z = Math::lerp(alpha[i][4], alpha[i][5], pos_fract[i].x);
float alpha_xyz = Math::lerp(alpha[i][6], alpha[i][7], pos_fract[i].x);
float alpha_z1 = Math::lerp(alpha_x0z, alpha_xyz, pos_fract[i].y);
alpha_interp[i] = Math::lerp(alpha_z0, alpha_z1, pos_fract[i].z);
}
r_color = color_interp[0].linear_interpolate(color_interp[1], level_filter);
r_alpha = Math::lerp(alpha_interp[0], alpha_interp[1], level_filter);
}
Vector3 VoxelLightBaker::_voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture) {
float bias = 2.5;
float max_distance = (Vector3(1, 1, 1) * (1 << (cell_subdiv - 1))).length();
float dist = bias;
float alpha = 0.0;
Vector3 color;
Vector3 scolor;
float salpha;
while (dist < max_distance && alpha < 0.95) {
float diameter = MAX(1.0, 2.0 * p_aperture * dist);
_sample_baked_octree_filtered_and_anisotropic(p_pos + dist * p_normal, p_normal, log2(diameter), scolor, salpha);
float a = (1.0 - alpha);
color += scolor * a;
alpha += a * salpha;
dist += diameter * 0.5;
}
/*if (blend_ambient) {
color.rgb = mix(ambient,color.rgb,min(1.0,alpha/0.95));
}*/
return color;
}
Vector3 VoxelLightBaker::_compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
//find arbitrary tangent and bitangent, then build a matrix
Vector3 v0 = Math::abs(p_normal.z) < 0.999 ? Vector3(0, 0, 1) : Vector3(0, 1, 0);
Vector3 tangent = v0.cross(p_normal).normalized();
Vector3 bitangent = tangent.cross(p_normal).normalized();
Basis normal_xform = Basis(tangent, bitangent, p_normal).transposed();
const Vector3 *cone_dirs = NULL;
const float *cone_weights = NULL;
int cone_dir_count = 0;
float cone_aperture = 0;
switch (bake_quality) {
case BAKE_QUALITY_LOW: {
//default quality
static const Vector3 dirs[4] = {
Vector3(Math_SQRT12, 0, Math_SQRT12),
Vector3(0, Math_SQRT12, Math_SQRT12),
Vector3(-Math_SQRT12, 0, Math_SQRT12),
Vector3(0, -Math_SQRT12, Math_SQRT12)
};
static const float weights[4] = { 0.25, 0.25, 0.25, 0.25 };
cone_dirs = dirs;
cone_dir_count = 4;
cone_aperture = 1.0; // tan(angle) 90 degrees
cone_weights = weights;
} break;
case BAKE_QUALITY_MEDIUM: {
//default quality
static const Vector3 dirs[6] = {
Vector3(0, 0, 1),
Vector3(0.866025, 0, 0.5),
Vector3(0.267617, 0.823639, 0.5),
Vector3(-0.700629, 0.509037, 0.5),
Vector3(-0.700629, -0.509037, 0.5),
Vector3(0.267617, -0.823639, 0.5)
};
static const float weights[6] = { 0.25f, 0.15f, 0.15f, 0.15f, 0.15f, 0.15f };
//
cone_dirs = dirs;
cone_dir_count = 6;
cone_aperture = 0.577; // tan(angle) 60 degrees
cone_weights = weights;
} break;
case BAKE_QUALITY_HIGH: {
//high qualily
static const Vector3 dirs[10] = {
Vector3(0.8781648411741658, 0.0, 0.478358141694643),
Vector3(0.5369754325592234, 0.6794204427701518, 0.5000452447267606),
Vector3(-0.19849436573466497, 0.8429904390140635, 0.49996710542041645),
Vector3(-0.7856196499811189, 0.3639120321329737, 0.5003696617825604),
Vector3(-0.7856196499811189, -0.3639120321329737, 0.5003696617825604),
Vector3(-0.19849436573466497, -0.8429904390140635, 0.49996710542041645),
Vector3(0.5369754325592234, -0.6794204427701518, 0.5000452447267606),
Vector3(-0.4451656858129485, 0.0, 0.8954482185892644),
Vector3(0.19124006749743122, 0.39355745585016605, 0.8991883926788214),
Vector3(0.19124006749743122, -0.39355745585016605, 0.8991883926788214),
};
static const float weights[10] = { 0.08571f, 0.08571f, 0.08571f, 0.08571f, 0.08571f, 0.08571f, 0.08571f, 0.133333f, 0.133333f, 0.13333f };
cone_dirs = dirs;
cone_dir_count = 10;
cone_aperture = 0.404; // tan(angle) 45 degrees
cone_weights = weights;
} break;
}
Vector3 accum;
for (int i = 0; i < cone_dir_count; i++) {
Vector3 dir = normal_xform.xform(cone_dirs[i]).normalized(); //normal may not completely correct when transformed to cell
accum += _voxel_cone_trace(p_pos, dir, cone_aperture) * cone_weights[i];
}
return accum;
}
_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
/* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */
uint32_t x = *state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
*state = x;
return x;
}
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
int samples_per_quality[3] = { 48, 128, 512 };
int samples = samples_per_quality[bake_quality];
//create a basis in Z
Vector3 v0 = Math::abs(p_normal.z) < 0.999 ? Vector3(0, 0, 1) : Vector3(0, 1, 0);
Vector3 tangent = v0.cross(p_normal).normalized();
Vector3 bitangent = tangent.cross(p_normal).normalized();
Basis normal_xform = Basis(tangent, bitangent, p_normal).transposed();
float bias = 1.5;
int max_level = cell_subdiv - 1;
int size = 1 << max_level;
Vector3 accum;
float spread = Math::deg2rad(80.0);
const Light *light = bake_light.ptr();
const Cell *cells = bake_cells.ptr();
uint32_t local_rng_state = rand(); //needs to be fixed again
for (int i = 0; i < samples; i++) {
float random_angle1 = (((xorshift32(&local_rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
Vector3 axis(0, sin(random_angle1), cos(random_angle1));
float random_angle2 = ((xorshift32(&local_rng_state) % 65535) / 65535.0) * Math_PI * 2.0;
Basis rot(Vector3(0, 0, 1), random_angle2);
axis = rot.xform(axis);
Vector3 direction = normal_xform.xform(axis).normalized();
Vector3 advance = direction * _get_normal_advance(direction);
Vector3 pos = p_pos /*+ Vector3(0.5, 0.5, 0.5)*/ + advance * bias;
uint32_t cell = CHILD_EMPTY;
while (cell == CHILD_EMPTY) {
int x = int(pos.x);
int y = int(pos.y);
int z = int(pos.z);
int ofs_x = 0;
int ofs_y = 0;
int ofs_z = 0;
int half = size / 2;
if (x < 0 || x >= size)
break;
if (y < 0 || y >= size)
break;
if (z < 0 || z >= size)
break;
//int level_limit = max_level;
cell = 0; //start from root
for (int j = 0; j < max_level; j++) {
const Cell *bc = &cells[cell];
int child = 0;
if (x >= ofs_x + half) {
child |= 1;
ofs_x += half;
}
if (y >= ofs_y + half) {
child |= 2;
ofs_y += half;
}
if (z >= ofs_z + half) {
child |= 4;
ofs_z += half;
}
cell = bc->children[child];
if (unlikely(cell == CHILD_EMPTY))
break;
half >>= 1;
}
pos += advance;
}
if (unlikely(cell != CHILD_EMPTY)) {
for (int j = 0; j < 6; j++) {
//anisotropic read light
float amount = direction.dot(aniso_normal[j]);
if (amount <= 0)
continue;
accum.x += light[cell].accum[j][0] * amount;
accum.y += light[cell].accum[j][1] * amount;
accum.z += light[cell].accum[j][2] * amount;
}
accum.x += cells[cell].emission[0];
accum.y += cells[cell].emission[1];
accum.z += cells[cell].emission[2];
}
}
// Make sure we don't reset this thread's RNG state
return accum / samples;
}
void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
LightMap *pixel = &p_line[p_x];
if (pixel->pos == Vector3())
return;
switch (bake_mode) {
case BAKE_MODE_CONE_TRACE: {
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
} break;
case BAKE_MODE_RAY_TRACE: {
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
} break;
}
}
Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, float default_texels_per_unit, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
//transfer light information to a lightmap
Ref<Mesh> mesh = p_mesh;
//step 1 - create lightmap
int width;
int height;
Vector<LightMap> lightmap;
Transform xform = to_cell_space * p_xform;
if (mesh->get_lightmap_size_hint() == Size2()) {
double area = 0;
double uv_area = 0;
for (int i = 0; i < mesh->get_surface_count(); i++) {
Array arrays = mesh->surface_get_arrays(i);
PoolVector<Vector3> vertices = arrays[Mesh::ARRAY_VERTEX];
PoolVector<Vector2> uv2 = arrays[Mesh::ARRAY_TEX_UV2];
PoolVector<int> indices = arrays[Mesh::ARRAY_INDEX];
ERR_FAIL_COND_V(vertices.size() == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(uv2.size() == 0, ERR_INVALID_PARAMETER);
int vc = vertices.size();
PoolVector<Vector3>::Read vr = vertices.read();
PoolVector<Vector2>::Read u2r = uv2.read();
PoolVector<int>::Read ir;
int ic = 0;
if (indices.size()) {
ic = indices.size();
ir = indices.read();
}
int faces = ic ? ic / 3 : vc / 3;
for (int j = 0; j < faces; j++) {
Vector3 vertex[3];
Vector2 uv[3];
for (int k = 0; k < 3; k++) {
int idx = ic ? ir[j * 3 + k] : j * 3 + k;
vertex[k] = xform.xform(vr[idx]);
uv[k] = u2r[idx];
}
Vector3 p1 = vertex[0];
Vector3 p2 = vertex[1];
Vector3 p3 = vertex[2];
double a = p1.distance_to(p2);
double b = p2.distance_to(p3);
double c = p3.distance_to(p1);
double halfPerimeter = (a + b + c) / 2.0;
area += sqrt(halfPerimeter * (halfPerimeter - a) * (halfPerimeter - b) * (halfPerimeter - c));
Vector2 uv_p1 = uv[0];
Vector2 uv_p2 = uv[1];
Vector2 uv_p3 = uv[2];
double uv_a = uv_p1.distance_to(uv_p2);
double uv_b = uv_p2.distance_to(uv_p3);
double uv_c = uv_p3.distance_to(uv_p1);
double uv_halfPerimeter = (uv_a + uv_b + uv_c) / 2.0;
uv_area += sqrt(uv_halfPerimeter * (uv_halfPerimeter - uv_a) * (uv_halfPerimeter - uv_b) * (uv_halfPerimeter - uv_c));
}
}
if (uv_area < 0.0001f) {
uv_area = 1.0;
}
int pixels = (ceil((1.0 / sqrt(uv_area)) * sqrt(area * default_texels_per_unit)));
width = height = CLAMP(pixels, 2, 4096);
} else {
width = mesh->get_lightmap_size_hint().x;
height = mesh->get_lightmap_size_hint().y;
}
lightmap.resize(width * height);
//step 2 plot faces to lightmap
for (int i = 0; i < mesh->get_surface_count(); i++) {
Array arrays = mesh->surface_get_arrays(i);
PoolVector<Vector3> vertices = arrays[Mesh::ARRAY_VERTEX];
PoolVector<Vector3> normals = arrays[Mesh::ARRAY_NORMAL];
PoolVector<Vector2> uv2 = arrays[Mesh::ARRAY_TEX_UV2];
PoolVector<int> indices = arrays[Mesh::ARRAY_INDEX];
ERR_FAIL_COND_V(vertices.size() == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(normals.size() == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(uv2.size() == 0, ERR_INVALID_PARAMETER);
int vc = vertices.size();
PoolVector<Vector3>::Read vr = vertices.read();
PoolVector<Vector3>::Read nr = normals.read();
PoolVector<Vector2>::Read u2r = uv2.read();
PoolVector<int>::Read ir;
int ic = 0;
if (indices.size()) {
ic = indices.size();
ir = indices.read();
}
int faces = ic ? ic / 3 : vc / 3;
for (int j = 0; j < faces; j++) {
Vector3 vertex[3];
Vector3 normal[3];
Vector2 uv[3];
for (int k = 0; k < 3; k++) {
int idx = ic ? ir[j * 3 + k] : j * 3 + k;
vertex[k] = xform.xform(vr[idx]);
normal[k] = xform.basis.xform(nr[idx]).normalized();
uv[k] = u2r[idx];
}
_plot_triangle(uv, vertex, normal, lightmap.ptrw(), width, height);
}
}
//step 3 perform voxel cone trace on lightmap pixels
{
LightMap *lightmap_ptr = lightmap.ptrw();
uint64_t begin_time = OS::get_singleton()->get_ticks_usec();
volatile int lines = 0;
// make sure our OS-level rng is seeded
for (int i = 0; i < height; i++) {
thread_process_array(width, this, &VoxelLightBaker::_lightmap_bake_point, &lightmap_ptr[i * width]);
lines = MAX(lines, i); //for multithread
if (p_bake_time_func) {
uint64_t elapsed = OS::get_singleton()->get_ticks_usec() - begin_time;
float elapsed_sec = double(elapsed) / 1000000.0;
float remaining = lines < 1 ? 0 : (elapsed_sec / lines) * (height - lines - 1);
if (p_bake_time_func(p_bake_time_ud, remaining, lines / float(height))) {
return ERR_SKIP;
}
}
}
if (bake_mode == BAKE_MODE_RAY_TRACE) {
//blur
//gauss kernel, 7 step sigma 2
static const float gauss_kernel[4] = { 0.214607f, 0.189879f, 0.131514f, 0.071303f };
//horizontal pass
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (lightmap_ptr[i * width + j].normal == Vector3()) {
continue; //empty
}
float gauss_sum = gauss_kernel[0];
Vector3 accum = lightmap_ptr[i * width + j].light * gauss_kernel[0];
for (int k = 1; k < 4; k++) {
int new_x = j + k;
if (new_x >= width || lightmap_ptr[i * width + new_x].normal == Vector3())
break;
gauss_sum += gauss_kernel[k];
accum += lightmap_ptr[i * width + new_x].light * gauss_kernel[k];
}
for (int k = 1; k < 4; k++) {
int new_x = j - k;
if (new_x < 0 || lightmap_ptr[i * width + new_x].normal == Vector3())
break;
gauss_sum += gauss_kernel[k];
accum += lightmap_ptr[i * width + new_x].light * gauss_kernel[k];
}
lightmap_ptr[i * width + j].pos = accum /= gauss_sum;
}
}
//vertical pass
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (lightmap_ptr[i * width + j].normal == Vector3())
continue; //empty, don't write over it anyway
float gauss_sum = gauss_kernel[0];
Vector3 accum = lightmap_ptr[i * width + j].pos * gauss_kernel[0];
for (int k = 1; k < 4; k++) {
int new_y = i + k;
if (new_y >= height || lightmap_ptr[new_y * width + j].normal == Vector3())
break;
gauss_sum += gauss_kernel[k];
accum += lightmap_ptr[new_y * width + j].pos * gauss_kernel[k];
}
for (int k = 1; k < 4; k++) {
int new_y = i - k;
if (new_y < 0 || lightmap_ptr[new_y * width + j].normal == Vector3())
break;
gauss_sum += gauss_kernel[k];
accum += lightmap_ptr[new_y * width + j].pos * gauss_kernel[k];
}
lightmap_ptr[i * width + j].light = accum /= gauss_sum;
}
}
}
//add directional light (do this after blur)
{
const Cell *cells = bake_cells.ptr();
const Light *light = bake_light.ptr();
#ifdef _OPENMP
#pragma omp parallel
#endif
for (int i = 0; i < height; i++) {
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1)
#endif
for (int j = 0; j < width; j++) {
//if (i == 125 && j == 280) {
LightMap *pixel = &lightmap_ptr[i * width + j];
if (pixel->pos == Vector3())
continue; //unused, skipe
int x = int(pixel->pos.x) - 1;
int y = int(pixel->pos.y) - 1;
int z = int(pixel->pos.z) - 1;
Color accum;
int size = 1 << (cell_subdiv - 1);
int found = 0;
for (int k = 0; k < 8; k++) {
int ofs_x = x;
int ofs_y = y;
int ofs_z = z;
if (k & 1)
ofs_x++;
if (k & 2)
ofs_y++;
if (k & 4)
ofs_z++;
if (x < 0 || x >= size)
continue;
if (y < 0 || y >= size)
continue;
if (z < 0 || z >= size)
continue;
uint32_t cell = _find_cell_at_pos(cells, ofs_x, ofs_y, ofs_z);
if (cell == CHILD_EMPTY)
continue;
for (int l = 0; l < 6; l++) {
float s = pixel->normal.dot(aniso_normal[l]);
if (s < 0)
s = 0;
accum.r += light[cell].direct_accum[l][0] * s;
accum.g += light[cell].direct_accum[l][1] * s;
accum.b += light[cell].direct_accum[l][2] * s;
}
found++;
}
if (found) {
accum /= found;
pixel->light.x += accum.r;
pixel->light.y += accum.g;
pixel->light.z += accum.b;
}
}
}
}
{
//fill gaps with neighbour vertices to avoid filter fades to black on edges
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (lightmap_ptr[i * width + j].normal != Vector3()) {
continue; //filled, skip
}
//this can't be made separatable..
int closest_i = -1, closest_j = 1;
float closest_dist = 1e20;
const int margin = 3;
for (int y = i - margin; y <= i + margin; y++) {
for (int x = j - margin; x <= j + margin; x++) {
if (x == j && y == i)
continue;
if (x < 0 || x >= width)
continue;
if (y < 0 || y >= height)
continue;
if (lightmap_ptr[y * width + x].normal == Vector3())
continue; //also ensures that blitted stuff is not reused
float dist = Vector2(i - y, j - x).length();
if (dist > closest_dist)
continue;
closest_dist = dist;
closest_i = y;
closest_j = x;
}
}
if (closest_i != -1) {
lightmap_ptr[i * width + j].light = lightmap_ptr[closest_i * width + closest_j].light;
}
}
}
}
{
//fill the lightmap data
r_lightmap.width = width;
r_lightmap.height = height;
r_lightmap.light.resize(lightmap.size() * 3);
PoolVector<float>::Write w = r_lightmap.light.write();
for (int i = 0; i < lightmap.size(); i++) {
w[i * 3 + 0] = lightmap[i].light.x;
w[i * 3 + 1] = lightmap[i].light.y;
w[i * 3 + 2] = lightmap[i].light.z;
}
}
#if 0 // Enable for debugging.
{
PoolVector<uint8_t> img;
int ls = lightmap.size();
img.resize(ls * 3);
{
PoolVector<uint8_t>::Write w = img.write();
for (int i = 0; i < ls; i++) {
w[i * 3 + 0] = CLAMP(lightmap_ptr[i].light.x * 255, 0, 255);
w[i * 3 + 1] = CLAMP(lightmap_ptr[i].light.y * 255, 0, 255);
w[i * 3 + 2] = CLAMP(lightmap_ptr[i].light.z * 255, 0, 255);
//w[i * 3 + 0] = CLAMP(lightmap_ptr[i].normal.x * 255, 0, 255);
//w[i * 3 + 1] = CLAMP(lightmap_ptr[i].normal.y * 255, 0, 255);
//w[i * 3 + 2] = CLAMP(lightmap_ptr[i].normal.z * 255, 0, 255);
//w[i * 3 + 0] = CLAMP(lightmap_ptr[i].pos.x / (1 << (cell_subdiv - 1)) * 255, 0, 255);
//w[i * 3 + 1] = CLAMP(lightmap_ptr[i].pos.y / (1 << (cell_subdiv - 1)) * 255, 0, 255);
//w[i * 3 + 2] = CLAMP(lightmap_ptr[i].pos.z / (1 << (cell_subdiv - 1)) * 255, 0, 255);
}
}
Ref<Image> image;
image.instance();
image->create(width, height, false, Image::FORMAT_RGB8, img);
String name = p_mesh->get_name();
if (name == "") {
name = "Mesh" + itos(p_mesh->get_instance_id());
}
image->save_png(name + ".png");
}
#endif
}
return OK;
}
void VoxelLightBaker::begin_bake(int p_subdiv, const AABB &p_bounds) {
original_bounds = p_bounds;
@ -2482,5 +1613,4 @@ VoxelLightBaker::VoxelLightBaker() {
color_scan_cell_width = 4;
bake_texture_size = 128;
propagation = 0.85;
energy = 1.0;
}

View File

@ -128,10 +128,8 @@ private:
int bake_texture_size;
float cell_size;
float propagation;
float energy;
BakeQuality bake_quality;
BakeMode bake_mode;
int max_original_cells;
@ -147,25 +145,10 @@ private:
uint32_t _find_cell_at_pos(const Cell *cells, int x, int y, int z);
struct LightMap {
Vector3 light;
Vector3 pos;
Vector3 normal;
};
void _plot_triangle(Vector2 *vertices, Vector3 *positions, Vector3 *normals, LightMap *pixels, int width, int height);
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
void _lightmap_bake_point(uint32_t p_x, LightMap *p_line);
public:
void begin_bake(int p_subdiv, const AABB &p_bounds);
void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, float p_propagation = 0.85);
void plot_light_directional(const Vector3 &p_direction, const Color &p_color, float p_energy, float p_indirect_energy, bool p_direct);
void plot_light_omni(const Vector3 &p_pos, const Color &p_color, float p_energy, float p_indirect_energy, float p_radius, float p_attenutation, bool p_direct);
void plot_light_spot(const Vector3 &p_pos, const Vector3 &p_axis, const Color &p_color, float p_energy, float p_indirect_energy, float p_radius, float p_attenutation, float p_spot_angle, float p_spot_attenuation, bool p_direct);
@ -177,8 +160,6 @@ public:
PoolVector<float> light;
};
Error make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, float default_texels_per_unit, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float) = NULL, void *p_bake_time_ud = NULL);
PoolVector<int> create_gi_probe_data();
Ref<MultiMesh> create_debug_multimesh(DebugMode p_mode = DEBUG_ALBEDO);
PoolVector<uint8_t> create_capture_octree(int p_subdiv);

View File

@ -30,6 +30,8 @@
#include "mesh.h"
#include "core/crypto/crypto_core.h"
#include "core/local_vector.h"
#include "core/pair.h"
#include "scene/resources/concave_polygon_shape.h"
#include "scene/resources/convex_polygon_shape.h"
@ -1108,18 +1110,35 @@ struct ArrayMeshLightmapSurface {
};
Error ArrayMesh::lightmap_unwrap(const Transform &p_base_transform, float p_texel_size) {
int *cache_data = nullptr;
unsigned int cache_size = 0;
bool use_cache = false; // Don't use cache
return lightmap_unwrap_cached(cache_data, cache_size, use_cache, p_base_transform, p_texel_size);
}
Error ArrayMesh::lightmap_unwrap_cached(int *&r_cache_data, unsigned int &r_cache_size, bool &r_used_cache, const Transform &p_base_transform, float p_texel_size) {
ERR_FAIL_COND_V(!array_mesh_lightmap_unwrap_callback, ERR_UNCONFIGURED);
ERR_FAIL_COND_V_MSG(blend_shapes.size() != 0, ERR_UNAVAILABLE, "Can't unwrap mesh with blend shapes.");
Vector<float> vertices;
Vector<float> normals;
Vector<int> indices;
Vector<int> face_materials;
Vector<float> uv;
Vector<Pair<int, int> > uv_index;
LocalVector<float> vertices;
LocalVector<float> normals;
LocalVector<int> indices;
LocalVector<int> face_materials;
LocalVector<float> uv;
LocalVector<Pair<int, int> > uv_indices;
Vector<ArrayMeshLightmapSurface> lightmap_surfaces;
// Keep only the scale
Basis basis = p_base_transform.get_basis();
Vector3 scale = Vector3(basis.get_axis(0).length(), basis.get_axis(1).length(), basis.get_axis(2).length());
Transform transform;
transform.scale(scale);
Basis normal_basis = transform.basis.inverse().transposed();
Vector<ArrayMeshLightmapSurface> surfaces;
for (int i = 0; i < get_surface_count(); i++) {
ArrayMeshLightmapSurface s;
s.primitive = surface_get_primitive_type(i);
@ -1143,30 +1162,36 @@ Error ArrayMesh::lightmap_unwrap(const Transform &p_base_transform, float p_texe
vertices.resize((vertex_ofs + vc) * 3);
normals.resize((vertex_ofs + vc) * 3);
uv_index.resize(vertex_ofs + vc);
uv_indices.resize(vertex_ofs + vc);
for (int j = 0; j < vc; j++) {
Vector3 v = p_base_transform.xform(r[j]);
Vector3 n = p_base_transform.basis.xform(rn[j]).normalized();
Vector3 v = transform.xform(r[j]);
Vector3 n = normal_basis.xform(rn[j]).normalized();
vertices.write[(j + vertex_ofs) * 3 + 0] = v.x;
vertices.write[(j + vertex_ofs) * 3 + 1] = v.y;
vertices.write[(j + vertex_ofs) * 3 + 2] = v.z;
normals.write[(j + vertex_ofs) * 3 + 0] = n.x;
normals.write[(j + vertex_ofs) * 3 + 1] = n.y;
normals.write[(j + vertex_ofs) * 3 + 2] = n.z;
uv_index.write[j + vertex_ofs] = Pair<int, int>(i, j);
vertices[(j + vertex_ofs) * 3 + 0] = v.x;
vertices[(j + vertex_ofs) * 3 + 1] = v.y;
vertices[(j + vertex_ofs) * 3 + 2] = v.z;
normals[(j + vertex_ofs) * 3 + 0] = n.x;
normals[(j + vertex_ofs) * 3 + 1] = n.y;
normals[(j + vertex_ofs) * 3 + 2] = n.z;
uv_indices[j + vertex_ofs] = Pair<int, int>(i, j);
}
PoolVector<int> rindices = arrays[Mesh::ARRAY_INDEX];
int ic = rindices.size();
float eps = 1.19209290e-7F; // Taken from xatlas.h
if (ic == 0) {
for (int j = 0; j < vc / 3; j++) {
if (Face3(r[j * 3 + 0], r[j * 3 + 1], r[j * 3 + 2]).is_degenerate())
Vector3 p0 = transform.xform(r[j * 3 + 0]);
Vector3 p1 = transform.xform(r[j * 3 + 1]);
Vector3 p2 = transform.xform(r[j * 3 + 2]);
if ((p0 - p1).length_squared() < eps || (p1 - p2).length_squared() < eps || (p2 - p0).length_squared() < eps) {
continue;
}
indices.push_back(vertex_ofs + j * 3 + 0);
indices.push_back(vertex_ofs + j * 3 + 1);
@ -1178,8 +1203,14 @@ Error ArrayMesh::lightmap_unwrap(const Transform &p_base_transform, float p_texe
PoolVector<int>::Read ri = rindices.read();
for (int j = 0; j < ic / 3; j++) {
if (Face3(r[ri[j * 3 + 0]], r[ri[j * 3 + 1]], r[ri[j * 3 + 2]]).is_degenerate())
Vector3 p0 = transform.xform(r[ri[j * 3 + 0]]);
Vector3 p1 = transform.xform(r[ri[j * 3 + 1]]);
Vector3 p2 = transform.xform(r[ri[j * 3 + 2]]);
if ((p0 - p1).length_squared() < eps || (p1 - p2).length_squared() < eps || (p2 - p0).length_squared() < eps) {
continue;
}
indices.push_back(vertex_ofs + ri[j * 3 + 0]);
indices.push_back(vertex_ofs + ri[j * 3 + 1]);
indices.push_back(vertex_ofs + ri[j * 3 + 2]);
@ -1187,7 +1218,49 @@ Error ArrayMesh::lightmap_unwrap(const Transform &p_base_transform, float p_texe
}
}
surfaces.push_back(s);
lightmap_surfaces.push_back(s);
}
CryptoCore::MD5Context ctx;
ctx.start();
ctx.update((unsigned char *)&p_texel_size, sizeof(float));
ctx.update((unsigned char *)indices.ptr(), sizeof(int) * indices.size());
ctx.update((unsigned char *)face_materials.ptr(), sizeof(int) * face_materials.size());
ctx.update((unsigned char *)vertices.ptr(), sizeof(float) * vertices.size());
ctx.update((unsigned char *)normals.ptr(), sizeof(float) * normals.size());
unsigned char hash[16];
ctx.finish(hash);
bool cached = false;
unsigned int cache_idx = 0;
if (r_used_cache && r_cache_data) {
//Check if hash is in cache data
int *cache_data = r_cache_data;
int n_entries = cache_data[0];
unsigned int r_idx = 1;
for (int i = 0; i < n_entries; ++i) {
if (memcmp(&cache_data[r_idx], hash, 16) == 0) {
cached = true;
cache_idx = r_idx;
break;
}
r_idx += 4; // hash
r_idx += 2; // size hint
int vertex_count = cache_data[r_idx];
r_idx += 1; // vertex count
r_idx += vertex_count; // vertex
r_idx += vertex_count * 2; // uvs
int index_count = cache_data[r_idx];
r_idx += 1; // index count
r_idx += index_count; // indices
}
}
//unwrap
@ -1200,25 +1273,101 @@ Error ArrayMesh::lightmap_unwrap(const Transform &p_base_transform, float p_texe
int size_x;
int size_y;
if (r_used_cache && cached) {
int *cache_data = r_cache_data;
// Return cache data pointer to the caller
r_cache_data = &cache_data[cache_idx];
cache_idx += 4;
// Load size
size_x = ((int *)cache_data)[cache_idx];
size_y = ((int *)cache_data)[cache_idx + 1];
cache_idx += 2;
// Load vertices
gen_vertex_count = cache_data[cache_idx];
cache_idx++;
gen_vertices = &cache_data[cache_idx];
cache_idx += gen_vertex_count;
// Load UVs
gen_uvs = (float *)&cache_data[cache_idx];
cache_idx += gen_vertex_count * 2;
// Load indices
gen_index_count = cache_data[cache_idx];
cache_idx++;
gen_indices = &cache_data[cache_idx];
// Return cache data size to the caller
r_cache_size = sizeof(int) * (4 + 2 + 1 + gen_vertex_count + (gen_vertex_count * 2) + 1 + gen_index_count); // hash + size hint + vertex_count + vertices + uvs + index_count + indices
r_used_cache = true;
}
if (!cached) {
bool ok = array_mesh_lightmap_unwrap_callback(p_texel_size, vertices.ptr(), normals.ptr(), vertices.size() / 3, indices.ptr(), face_materials.ptr(), indices.size(), &gen_uvs, &gen_vertices, &gen_vertex_count, &gen_indices, &gen_index_count, &size_x, &size_y);
if (!ok) {
return ERR_CANT_CREATE;
}
if (r_used_cache) {
unsigned int new_cache_size = 4 + 2 + 1 + gen_vertex_count + (gen_vertex_count * 2) + 1 + gen_index_count; // hash + size hint + vertex_count + vertices + uvs + index_count + indices
new_cache_size *= sizeof(int);
int *new_cache_data = (int *)memalloc(new_cache_size);
unsigned int new_cache_idx = 0;
// hash
memcpy(&new_cache_data[new_cache_idx], hash, 16);
new_cache_idx += 4;
// size hint
new_cache_data[new_cache_idx] = size_x;
new_cache_data[new_cache_idx + 1] = size_y;
new_cache_idx += 2;
// vertex count
new_cache_data[new_cache_idx] = gen_vertex_count;
new_cache_idx++;
// vertices
memcpy(&new_cache_data[new_cache_idx], gen_vertices, sizeof(int) * gen_vertex_count);
new_cache_idx += gen_vertex_count;
// uvs
memcpy(&new_cache_data[new_cache_idx], gen_uvs, sizeof(float) * gen_vertex_count * 2);
new_cache_idx += gen_vertex_count * 2;
// index count
new_cache_data[new_cache_idx] = gen_index_count;
new_cache_idx++;
// indices
memcpy(&new_cache_data[new_cache_idx], gen_indices, sizeof(int) * gen_index_count);
new_cache_idx += gen_index_count;
// Return cache data to the caller
r_cache_data = new_cache_data;
r_cache_size = new_cache_size;
r_used_cache = false;
}
}
//remove surfaces
while (get_surface_count()) {
surface_remove(0);
}
//create surfacetools for each surface..
Vector<Ref<SurfaceTool> > surfaces_tools;
LocalVector<Ref<SurfaceTool> > surfaces_tools;
for (int i = 0; i < surfaces.size(); i++) {
for (int i = 0; i < lightmap_surfaces.size(); i++) {
Ref<SurfaceTool> st;
st.instance();
st->begin(Mesh::PRIMITIVE_TRIANGLES);
st->set_material(surfaces[i].material);
st->set_material(lightmap_surfaces[i].material);
surfaces_tools.push_back(st); //stay there
}
@ -1226,61 +1375,62 @@ Error ArrayMesh::lightmap_unwrap(const Transform &p_base_transform, float p_texe
//go through all indices
for (int i = 0; i < gen_index_count; i += 3) {
ERR_FAIL_INDEX_V(gen_vertices[gen_indices[i + 0]], uv_index.size(), ERR_BUG);
ERR_FAIL_INDEX_V(gen_vertices[gen_indices[i + 1]], uv_index.size(), ERR_BUG);
ERR_FAIL_INDEX_V(gen_vertices[gen_indices[i + 2]], uv_index.size(), ERR_BUG);
ERR_FAIL_INDEX_V(gen_vertices[gen_indices[i + 0]], (int)uv_indices.size(), ERR_BUG);
ERR_FAIL_INDEX_V(gen_vertices[gen_indices[i + 1]], (int)uv_indices.size(), ERR_BUG);
ERR_FAIL_INDEX_V(gen_vertices[gen_indices[i + 2]], (int)uv_indices.size(), ERR_BUG);
ERR_FAIL_COND_V(uv_index[gen_vertices[gen_indices[i + 0]]].first != uv_index[gen_vertices[gen_indices[i + 1]]].first || uv_index[gen_vertices[gen_indices[i + 0]]].first != uv_index[gen_vertices[gen_indices[i + 2]]].first, ERR_BUG);
ERR_FAIL_COND_V(uv_indices[gen_vertices[gen_indices[i + 0]]].first != uv_indices[gen_vertices[gen_indices[i + 1]]].first || uv_indices[gen_vertices[gen_indices[i + 0]]].first != uv_indices[gen_vertices[gen_indices[i + 2]]].first, ERR_BUG);
int surface = uv_index[gen_vertices[gen_indices[i + 0]]].first;
int surface = uv_indices[gen_vertices[gen_indices[i + 0]]].first;
for (int j = 0; j < 3; j++) {
SurfaceTool::Vertex v = surfaces[surface].vertices[uv_index[gen_vertices[gen_indices[i + j]]].second];
SurfaceTool::Vertex v = lightmap_surfaces[surface].vertices[uv_indices[gen_vertices[gen_indices[i + j]]].second];
if (surfaces[surface].format & ARRAY_FORMAT_COLOR) {
surfaces_tools.write[surface]->add_color(v.color);
if (lightmap_surfaces[surface].format & ARRAY_FORMAT_COLOR) {
surfaces_tools[surface]->add_color(v.color);
}
if (surfaces[surface].format & ARRAY_FORMAT_TEX_UV) {
surfaces_tools.write[surface]->add_uv(v.uv);
if (lightmap_surfaces[surface].format & ARRAY_FORMAT_TEX_UV) {
surfaces_tools[surface]->add_uv(v.uv);
}
if (surfaces[surface].format & ARRAY_FORMAT_NORMAL) {
surfaces_tools.write[surface]->add_normal(v.normal);
if (lightmap_surfaces[surface].format & ARRAY_FORMAT_NORMAL) {
surfaces_tools[surface]->add_normal(v.normal);
}
if (surfaces[surface].format & ARRAY_FORMAT_TANGENT) {
if (lightmap_surfaces[surface].format & ARRAY_FORMAT_TANGENT) {
Plane t;
t.normal = v.tangent;
t.d = v.binormal.dot(v.normal.cross(v.tangent)) < 0 ? -1 : 1;
surfaces_tools.write[surface]->add_tangent(t);
surfaces_tools[surface]->add_tangent(t);
}
if (surfaces[surface].format & ARRAY_FORMAT_BONES) {
surfaces_tools.write[surface]->add_bones(v.bones);
if (lightmap_surfaces[surface].format & ARRAY_FORMAT_BONES) {
surfaces_tools[surface]->add_bones(v.bones);
}
if (surfaces[surface].format & ARRAY_FORMAT_WEIGHTS) {
surfaces_tools.write[surface]->add_weights(v.weights);
if (lightmap_surfaces[surface].format & ARRAY_FORMAT_WEIGHTS) {
surfaces_tools[surface]->add_weights(v.weights);
}
Vector2 uv2(gen_uvs[gen_indices[i + j] * 2 + 0], gen_uvs[gen_indices[i + j] * 2 + 1]);
surfaces_tools.write[surface]->add_uv2(uv2);
surfaces_tools[surface]->add_uv2(uv2);
surfaces_tools.write[surface]->add_vertex(v.vertex);
surfaces_tools[surface]->add_vertex(v.vertex);
}
}
//generate surfaces
for (unsigned int i = 0; i < surfaces_tools.size(); i++) {
surfaces_tools[i]->index();
surfaces_tools[i]->commit(Ref<ArrayMesh>((ArrayMesh *)this), lightmap_surfaces[i].format);
}
set_lightmap_size_hint(Size2(size_x, size_y));
if (!cached) {
//free stuff
::free(gen_vertices);
::free(gen_indices);
::free(gen_uvs);
//generate surfaces
for (int i = 0; i < surfaces_tools.size(); i++) {
surfaces_tools.write[i]->index();
surfaces_tools.write[i]->commit(Ref<ArrayMesh>((ArrayMesh *)this), surfaces[i].format);
}
set_lightmap_size_hint(Size2(size_x, size_y));
return OK;
}

View File

@ -231,6 +231,7 @@ public:
void regen_normalmaps();
Error lightmap_unwrap(const Transform &p_base_transform = Transform(), float p_texel_size = 0.05);
Error lightmap_unwrap_cached(int *&r_cache_data, unsigned int &r_cache_size, bool &r_used_cache, const Transform &p_base_transform = Transform(), float p_texel_size = 0.05);
virtual void reload_from_file();

View File

@ -390,6 +390,10 @@ ProceduralSky::TextureSize ProceduralSky::get_texture_size() const {
return texture_size;
}
Ref<Image> ProceduralSky::get_panorama() const {
return panorama;
}
RID ProceduralSky::get_rid() const {
return sky;
}
@ -414,9 +418,9 @@ void ProceduralSky::_update_sky() {
}
} else {
Ref<Image> image = _generate_sky();
VS::get_singleton()->texture_allocate(texture, image->get_width(), image->get_height(), 0, Image::FORMAT_RGBE9995, VS::TEXTURE_TYPE_2D, VS::TEXTURE_FLAG_FILTER | VS::TEXTURE_FLAG_REPEAT);
VS::get_singleton()->texture_set_data(texture, image);
panorama = _generate_sky();
VS::get_singleton()->texture_allocate(texture, panorama->get_width(), panorama->get_height(), 0, Image::FORMAT_RGBE9995, VS::TEXTURE_TYPE_2D, VS::TEXTURE_FLAG_FILTER | VS::TEXTURE_FLAG_REPEAT);
VS::get_singleton()->texture_set_data(texture, panorama);
_radiance_changed();
}
}
@ -432,8 +436,9 @@ void ProceduralSky::_queue_update() {
void ProceduralSky::_thread_done(const Ref<Image> &p_image) {
VS::get_singleton()->texture_allocate(texture, p_image->get_width(), p_image->get_height(), 0, Image::FORMAT_RGBE9995, VS::TEXTURE_TYPE_2D, VS::TEXTURE_FLAG_FILTER | VS::TEXTURE_FLAG_REPEAT);
VS::get_singleton()->texture_set_data(texture, p_image);
panorama = p_image;
VS::get_singleton()->texture_allocate(texture, panorama->get_width(), panorama->get_height(), 0, Image::FORMAT_RGBE9995, VS::TEXTURE_TYPE_2D, VS::TEXTURE_FLAG_FILTER | VS::TEXTURE_FLAG_REPEAT);
VS::get_singleton()->texture_set_data(texture, panorama);
_radiance_changed();
Thread::wait_to_finish(sky_thread);
memdelete(sky_thread);

View File

@ -122,6 +122,7 @@ private:
RID sky;
RID texture;
Ref<Image> panorama;
bool update_queued;
bool regen_queued;
@ -189,6 +190,8 @@ public:
void set_texture_size(TextureSize p_size);
TextureSize get_texture_size() const;
Ref<Image> get_panorama() const;
virtual RID get_rid() const;
ProceduralSky(bool p_desaturate = false);

View File

@ -2250,6 +2250,143 @@ Image::Format TextureLayered::get_format() const {
return format;
}
Error TextureLayered::load(const String &p_path) {
Error error;
FileAccess *f = FileAccess::open(p_path, FileAccess::READ, &error);
ERR_FAIL_COND_V(error, error);
uint8_t header[5] = { 0, 0, 0, 0, 0 };
f->get_buffer(header, 4);
if (header[0] == 'G' && header[1] == 'D' && header[2] == '3' && header[3] == 'T') {
if (!Object::cast_to<Texture3D>(this)) {
f->close();
memdelete(f);
ERR_FAIL_V(ERR_INVALID_DATA);
}
} else if (header[0] == 'G' && header[1] == 'D' && header[2] == 'A' && header[3] == 'T') {
if (!Object::cast_to<TextureArray>(this)) {
f->close();
memdelete(f);
ERR_FAIL_V(ERR_INVALID_DATA);
}
} else {
f->close();
memdelete(f);
ERR_FAIL_V_MSG(ERR_INVALID_DATA, "Unrecognized layered texture file format: " + String((const char *)header));
}
int tw = f->get_32();
int th = f->get_32();
int td = f->get_32();
int flags = f->get_32(); //texture flags!
Image::Format format = Image::Format(f->get_32());
uint32_t compression = f->get_32(); // 0 - lossless (PNG), 1 - vram, 2 - uncompressed
create(tw, th, td, format, flags);
for (int layer = 0; layer < td; layer++) {
Ref<Image> image;
image.instance();
if (compression == COMPRESS_LOSSLESS) {
//look for a PNG file inside
int mipmaps = f->get_32();
Vector<Ref<Image> > mipmap_images;
for (int i = 0; i < mipmaps; i++) {
uint32_t size = f->get_32();
PoolVector<uint8_t> pv;
pv.resize(size);
{
PoolVector<uint8_t>::Write w = pv.write();
f->get_buffer(w.ptr(), size);
}
Ref<Image> img = Image::lossless_unpacker(pv);
if (img.is_null() || img->empty() || format != img->get_format()) {
f->close();
memdelete(f);
ERR_FAIL_V(ERR_FILE_CORRUPT);
}
mipmap_images.push_back(img);
}
if (mipmap_images.size() == 1) {
image = mipmap_images[0];
} else {
int total_size = Image::get_image_data_size(tw, th, format, true);
PoolVector<uint8_t> img_data;
img_data.resize(total_size);
{
PoolVector<uint8_t>::Write w = img_data.write();
int ofs = 0;
for (int i = 0; i < mipmap_images.size(); i++) {
PoolVector<uint8_t> id = mipmap_images[i]->get_data();
int len = id.size();
PoolVector<uint8_t>::Read r = id.read();
copymem(&w[ofs], r.ptr(), len);
ofs += len;
}
}
image->create(tw, th, true, format, img_data);
if (image->empty()) {
f->close();
memdelete(f);
ERR_FAIL_V(ERR_FILE_CORRUPT);
}
}
} else {
//look for regular format
bool mipmaps = (flags & Texture::FLAG_MIPMAPS);
int total_size = Image::get_image_data_size(tw, th, format, mipmaps);
PoolVector<uint8_t> img_data;
img_data.resize(total_size);
{
PoolVector<uint8_t>::Write w = img_data.write();
int bytes = f->get_buffer(w.ptr(), total_size);
if (bytes != total_size) {
f->close();
memdelete(f);
ERR_FAIL_V(ERR_FILE_CORRUPT);
}
}
image->create(tw, th, mipmaps, format, img_data);
}
set_layer_data(image, layer);
}
memdelete(f);
path_to_file = p_path;
_change_notify();
return OK;
}
String TextureLayered::get_load_path() const {
return path_to_file;
}
uint32_t TextureLayered::get_width() const {
return width;
}
@ -2262,6 +2399,20 @@ uint32_t TextureLayered::get_depth() const {
return depth;
}
void TextureLayered::reload_from_file() {
String path = get_path();
if (!path.is_resource_file())
return;
path = ResourceLoader::path_remap(path); //remap for translation
path = ResourceLoader::import_remap(path); //remap for import
if (!path.is_resource_file())
return;
load(path);
}
void TextureLayered::_set_data(const Dictionary &p_data) {
ERR_FAIL_COND(!p_data.has("width"));
ERR_FAIL_COND(!p_data.has("height"));
@ -2410,139 +2561,11 @@ RES ResourceFormatLoaderTextureLayered::load(const String &p_path, const String
ERR_FAIL_V_MSG(RES(), "Unrecognized layered texture extension.");
}
FileAccess *f = FileAccess::open(p_path, FileAccess::READ);
ERR_FAIL_COND_V_MSG(!f, RES(), "Cannot open file '" + p_path + "'.");
uint8_t header[5] = { 0, 0, 0, 0, 0 };
f->get_buffer(header, 4);
if (header[0] == 'G' && header[1] == 'D' && header[2] == '3' && header[3] == 'T') {
if (tex3d.is_null()) {
f->close();
memdelete(f);
ERR_FAIL_COND_V(tex3d.is_null(), RES())
}
} else if (header[0] == 'G' && header[1] == 'D' && header[2] == 'A' && header[3] == 'T') {
if (texarr.is_null()) {
f->close();
memdelete(f);
ERR_FAIL_COND_V(texarr.is_null(), RES())
}
} else {
f->close();
memdelete(f);
ERR_FAIL_V_MSG(RES(), "Unrecognized layered texture file format '" + String((const char *)header) + "'.");
}
int tw = f->get_32();
int th = f->get_32();
int td = f->get_32();
int flags = f->get_32(); //texture flags!
Image::Format format = Image::Format(f->get_32());
uint32_t compression = f->get_32(); // 0 - lossless (PNG), 1 - vram, 2 - uncompressed
lt->create(tw, th, td, format, flags);
for (int layer = 0; layer < td; layer++) {
Ref<Image> image;
image.instance();
if (compression == COMPRESSION_LOSSLESS) {
//look for a PNG file inside
int mipmaps = f->get_32();
Vector<Ref<Image> > mipmap_images;
for (int i = 0; i < mipmaps; i++) {
uint32_t size = f->get_32();
PoolVector<uint8_t> pv;
pv.resize(size);
{
PoolVector<uint8_t>::Write w = pv.write();
f->get_buffer(w.ptr(), size);
}
Ref<Image> img = Image::lossless_unpacker(pv);
if (img.is_null() || img->empty() || format != img->get_format()) {
if (r_error) {
*r_error = ERR_FILE_CORRUPT;
}
f->close();
memdelete(f);
ERR_FAIL_V(RES());
}
mipmap_images.push_back(img);
}
if (mipmap_images.size() == 1) {
image = mipmap_images[0];
} else {
int total_size = Image::get_image_data_size(tw, th, format, true);
PoolVector<uint8_t> img_data;
img_data.resize(total_size);
{
PoolVector<uint8_t>::Write w = img_data.write();
int ofs = 0;
for (int i = 0; i < mipmap_images.size(); i++) {
PoolVector<uint8_t> id = mipmap_images[i]->get_data();
int len = id.size();
PoolVector<uint8_t>::Read r = id.read();
copymem(&w[ofs], r.ptr(), len);
ofs += len;
}
}
image->create(tw, th, true, format, img_data);
if (image->empty()) {
if (r_error) {
*r_error = ERR_FILE_CORRUPT;
}
f->close();
memdelete(f);
ERR_FAIL_V(RES());
}
}
} else {
//look for regular format
bool mipmaps = (flags & Texture::FLAG_MIPMAPS);
int total_size = Image::get_image_data_size(tw, th, format, mipmaps);
PoolVector<uint8_t> img_data;
img_data.resize(total_size);
{
PoolVector<uint8_t>::Write w = img_data.write();
int bytes = f->get_buffer(w.ptr(), total_size);
if (bytes != total_size) {
if (r_error) {
*r_error = ERR_FILE_CORRUPT;
}
f->close();
memdelete(f);
ERR_FAIL_V(RES());
}
}
image->create(tw, th, mipmaps, format, img_data);
}
lt->set_layer_data(image, layer);
}
Error err = lt->load(p_path);
if (r_error)
*r_error = OK;
if (err != OK)
return RES();
return lt;
}

View File

@ -477,7 +477,14 @@ public:
FLAGS_DEFAULT = FLAG_FILTER,
};
enum CompressMode {
COMPRESS_LOSSLESS,
COMPRESS_VIDEO_RAM,
COMPRESS_UNCOMPRESSED
};
private:
String path_to_file;
bool is_3d;
RID texture;
Image::Format format;
@ -487,6 +494,8 @@ private:
int height;
int depth;
virtual void reload_from_file();
void _set_data(const Dictionary &p_data);
Dictionary _get_data() const;
@ -498,6 +507,9 @@ public:
uint32_t get_flags() const;
Image::Format get_format() const;
Error load(const String &p_path);
String get_load_path() const;
uint32_t get_width() const;
uint32_t get_height() const;
uint32_t get_depth() const;
@ -536,12 +548,6 @@ public:
class ResourceFormatLoaderTextureLayered : public ResourceFormatLoader {
public:
enum Compression {
COMPRESSION_LOSSLESS,
COMPRESSION_VRAM,
COMPRESSION_UNCOMPRESSED
};
virtual RES load(const String &p_path, const String &p_original_path = "", Error *r_error = NULL);
virtual void get_recognized_extensions(List<String> *p_extensions) const;
virtual bool handles_type(const String &p_type) const;

View File

@ -120,6 +120,8 @@ public:
InstanceBase *lightmap_capture;
RID lightmap;
Vector<Color> lightmap_capture_data; //in a array (12 values) to avoid wasting space if unused. Alpha is unused, but needed to send to shader
int lightmap_slice;
Rect2 lightmap_uv_rect;
virtual void base_removed() = 0;
virtual void base_changed(bool p_aabb, bool p_materials) = 0;
@ -135,6 +137,8 @@ public:
baked_light = false;
redraw_if_visible = false;
lightmap_capture = NULL;
lightmap_slice = -1;
lightmap_uv_rect = Rect2(0, 0, 1, 1);
}
};

View File

@ -548,7 +548,7 @@ public:
BIND3(instance_set_blend_shape_weight, RID, int, float)
BIND3(instance_set_surface_material, RID, int, RID)
BIND2(instance_set_visible, RID, bool)
BIND3(instance_set_use_lightmap, RID, RID, RID)
BIND5(instance_set_use_lightmap, RID, RID, RID, int, const Rect2 &)
BIND2(instance_set_custom_aabb, RID, AABB)

View File

@ -484,7 +484,7 @@ void VisualServerScene::instance_set_base(RID p_instance, RID p_base) {
InstanceLightmapCaptureData *lightmap_capture = static_cast<InstanceLightmapCaptureData *>(instance->base_data);
//erase dependencies, since no longer a lightmap
while (lightmap_capture->users.front()) {
instance_set_use_lightmap(lightmap_capture->users.front()->get()->self, RID(), RID());
instance_set_use_lightmap(lightmap_capture->users.front()->get()->self, RID(), RID(), -1, Rect2(0, 0, 1, 1));
}
} break;
case VS::INSTANCE_GI_PROBE: {
@ -805,7 +805,7 @@ inline bool is_geometry_instance(VisualServer::InstanceType p_type) {
return p_type == VS::INSTANCE_MESH || p_type == VS::INSTANCE_MULTIMESH || p_type == VS::INSTANCE_PARTICLES || p_type == VS::INSTANCE_IMMEDIATE;
}
void VisualServerScene::instance_set_use_lightmap(RID p_instance, RID p_lightmap_instance, RID p_lightmap) {
void VisualServerScene::instance_set_use_lightmap(RID p_instance, RID p_lightmap_instance, RID p_lightmap, int p_lightmap_slice, const Rect2 &p_lightmap_uv_rect) {
Instance *instance = instance_owner.get(p_instance);
ERR_FAIL_COND(!instance);
@ -814,6 +814,8 @@ void VisualServerScene::instance_set_use_lightmap(RID p_instance, RID p_lightmap
InstanceLightmapCaptureData *lightmap_capture = static_cast<InstanceLightmapCaptureData *>(((Instance *)instance->lightmap_capture)->base_data);
lightmap_capture->users.erase(instance);
instance->lightmap = RID();
instance->lightmap_slice = -1;
instance->lightmap_uv_rect = Rect2(0, 0, 1, 1);
instance->lightmap_capture = NULL;
}
@ -826,6 +828,8 @@ void VisualServerScene::instance_set_use_lightmap(RID p_instance, RID p_lightmap
InstanceLightmapCaptureData *lightmap_capture = static_cast<InstanceLightmapCaptureData *>(((Instance *)instance->lightmap_capture)->base_data);
lightmap_capture->users.insert(instance);
instance->lightmap = p_lightmap;
instance->lightmap_slice = p_lightmap_slice;
instance->lightmap_uv_rect = p_lightmap_uv_rect;
}
}
@ -3618,7 +3622,7 @@ bool VisualServerScene::free(RID p_rid) {
Instance *instance = instance_owner.get(p_rid);
instance_set_use_lightmap(p_rid, RID(), RID());
instance_set_use_lightmap(p_rid, RID(), RID(), -1, Rect2(0, 0, 1, 1));
instance_set_scenario(p_rid, RID());
instance_set_base(p_rid, RID());
instance_geometry_set_material_override(p_rid, RID());

View File

@ -517,7 +517,7 @@ public:
virtual void instance_set_blend_shape_weight(RID p_instance, int p_shape, float p_weight);
virtual void instance_set_surface_material(RID p_instance, int p_surface, RID p_material);
virtual void instance_set_visible(RID p_instance, bool p_visible);
virtual void instance_set_use_lightmap(RID p_instance, RID p_lightmap_instance, RID p_lightmap);
virtual void instance_set_use_lightmap(RID p_instance, RID p_lightmap_instance, RID p_lightmap, int p_lightmap_slice, const Rect2 &p_lightmap_uv_rect);
virtual void instance_set_custom_aabb(RID p_instance, AABB p_aabb);

View File

@ -470,7 +470,7 @@ public:
FUNC3(instance_set_blend_shape_weight, RID, int, float)
FUNC3(instance_set_surface_material, RID, int, RID)
FUNC2(instance_set_visible, RID, bool)
FUNC3(instance_set_use_lightmap, RID, RID, RID)
FUNC5(instance_set_use_lightmap, RID, RID, RID, int, const Rect2 &)
FUNC2(instance_set_custom_aabb, RID, AABB)

View File

@ -1938,7 +1938,7 @@ void VisualServer::_bind_methods() {
ClassDB::bind_method(D_METHOD("instance_set_blend_shape_weight", "instance", "shape", "weight"), &VisualServer::instance_set_blend_shape_weight);
ClassDB::bind_method(D_METHOD("instance_set_surface_material", "instance", "surface", "material"), &VisualServer::instance_set_surface_material);
ClassDB::bind_method(D_METHOD("instance_set_visible", "instance", "visible"), &VisualServer::instance_set_visible);
ClassDB::bind_method(D_METHOD("instance_set_use_lightmap", "instance", "lightmap_instance", "lightmap"), &VisualServer::instance_set_use_lightmap);
ClassDB::bind_method(D_METHOD("instance_set_use_lightmap", "instance", "lightmap_instance", "lightmap", "lightmap_slice", "lightmap_uv_rect"), &VisualServer::instance_set_use_lightmap, DEFVAL(-1), DEFVAL(Rect2(0, 0, 1, 1)));
ClassDB::bind_method(D_METHOD("instance_set_custom_aabb", "instance", "aabb"), &VisualServer::instance_set_custom_aabb);
ClassDB::bind_method(D_METHOD("instance_attach_skeleton", "instance", "skeleton"), &VisualServer::instance_attach_skeleton);
ClassDB::bind_method(D_METHOD("instance_set_exterior", "instance", "enabled"), &VisualServer::instance_set_exterior);

View File

@ -843,7 +843,7 @@ public:
virtual void instance_set_surface_material(RID p_instance, int p_surface, RID p_material) = 0;
virtual void instance_set_visible(RID p_instance, bool p_visible) = 0;
virtual void instance_set_use_lightmap(RID p_instance, RID p_lightmap_instance, RID p_lightmap) = 0;
virtual void instance_set_use_lightmap(RID p_instance, RID p_lightmap_instance, RID p_lightmap, int p_lightmap_slice, const Rect2 &p_lightmap_uv_rect) = 0;
virtual void instance_set_custom_aabb(RID p_instance, AABB aabb) = 0;

54
thirdparty/README.md vendored
View File

@ -39,6 +39,25 @@ Files extracted from upstream source:
- all .cpp, .h, and .txt files in ConvectionKernels/
## embree
- Upstream: https://github.com/embree/embree
- Version: 3.12.1 (69bd4c272f1ed608494f233ecfff3feec516880b, 2020)
- License: Apache 2.0
Files extracted from upstream:
- All cpp files listed in `modules/raytrace/godot_update_embree.py`
- All header files in the directories listed in `modules/raytrace/godot_update_embree.py`
The `modules/raytrace/godot_update_embree.py`script can be used to pull the
relevant files from the latest Embree release and apply some automatic changes.
Some minor changes have been made in order to fix build errors.
They are marked with `// -- GODOT start --` and `// -- GODOT end --`
comments. Apply the patches in the `patches/` folder when syncing on newer upstream
commits.
## enet
@ -334,6 +353,10 @@ Collection of single-file libraries used in Godot components.
* Version: git (2f625846a775501fb69456567409a8b12f10ea25, 2012)
* License: BSD-3-Clause
* Modifications: use `const char*` instead of `char*` for input string
- `stb_rect_pack.h`
* Upstream: https://github.com/nothings/stb
* Version: 1.00
* License: Public Domain (Unlicense) or MIT
- `stb_vorbis.c`
* Upstream: https://github.com/nothings/stb
* Version: 1.20 (314d0a6f9af5af27e585336eecea333e95c5a2d8, 2020)
@ -360,6 +383,37 @@ Files extracted from the upstream source:
- LICENSE.txt
## oidn
- Upstream: https://github.com/OpenImageDenoise/oidn
- Version: 1.1.0 (c58c5216db05ceef4cde5a096862f2eeffd14c06, 2019)
- License: Apache 2.0
Files extracted from upstream source:
common/* (except tasking.* and CMakeLists.txt)
core/*
include/OpenImageDenoise/* (except version.h.in)
LICENSE.txt
mkl-dnn/include/*
mkl-dnn/src/* (except CMakeLists.txt)
weights/rtlightmap_hdr.tza
scripts/resource_to_cpp.py
Modified files:
Modifications are marked with `// -- GODOT start --` and `// -- GODOT end --`.
Patch files are provided in `oidn/patches/`.
core/autoencoder.cpp
core/autoencoder.h
core/common.h
core/device.cpp
core/device.h
core/transfer_function.cpp
scripts/resource_to_cpp.py (used in modules/denoise/resource_to_cpp.py)
## opus
- Upstream: https://opus-codec.org

View File

@ -0,0 +1,55 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <functional>
#include "parallel_reduce.h"
namespace embree
{
template<typename Index, class UnaryPredicate>
__forceinline bool parallel_any_of (Index first, Index last, UnaryPredicate pred)
{
bool ret = false;
#if defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred,&context](const tbb::blocked_range<size_t>& r) {
if (context.is_group_execution_cancelled()) return;
for (size_t i = r.begin(); i != r.end(); ++i) {
if (pred(i)) {
ret = true;
context.cancel_group_execution();
}
}
});
#else
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred](const tbb::blocked_range<size_t>& r) {
if (tbb::task::self().is_cancelled()) return;
for (size_t i = r.begin(); i != r.end(); ++i) {
if (pred(i)) {
ret = true;
tbb::task::self().cancel_group_execution();
}
}
});
#endif
#else
ret = parallel_reduce (first, last, false, [pred](const range<size_t>& r)->bool {
bool localret = false;
for (auto i=r.begin(); i<r.end(); ++i) {
localret |= pred(i);
}
return localret;
},
std::bit_or<bool>()
);
#endif
return ret;
}
} // end namespace

View File

@ -0,0 +1,56 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_filter.h"
#include "../sys/regression.h"
#include <map>
namespace embree
{
struct parallel_filter_regression_test : public RegressionTest
{
parallel_filter_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
auto pred = [&]( uint32_t v ) { return (v & 0x3) == 0; };
for (size_t N=10; N<1000000; N=size_t(2.1*N))
{
size_t N0 = rand() % N;
/* initialize array with random numbers */
std::vector<uint32_t> src(N);
std::map<uint32_t,int> m;
for (size_t i=0; i<N; i++) src[i] = rand();
/* count elements up */
for (size_t i=N0; i<N; i++)
if (pred(src[i]))
m[src[i]] = 0;
for (size_t i=N0; i<N; i++)
if (pred(src[i]))
m[src[i]]++;
/* filter array */
//size_t M = sequential_filter(src.data(),N0,N,pred);
size_t M = parallel_filter(src.data(),N0,N,size_t(1024),pred);
/* check if filtered data is correct */
for (size_t i=N0; i<M; i++) {
passed &= pred(src[i]);
m[src[i]]--;
}
for (size_t i=N0; i<M; i++)
passed &= (m[src[i]] == 0);
}
return passed;
}
};
parallel_filter_regression_test parallel_filter_regression("parallel_filter_regression");
}

View File

@ -0,0 +1,93 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename Ty, typename Index, typename Predicate>
inline Index sequential_filter( Ty* data, const Index first, const Index last, const Predicate& predicate)
{
Index j = first;
for (Index i=first; i<last; i++)
if (predicate(data[i]))
data[j++] = data[i];
return j;
}
template<typename Ty, typename Index, typename Predicate>
inline Index parallel_filter( Ty* data, const Index begin, const Index end, const Index minStepSize, const Predicate& predicate)
{
/* sequential fallback */
if (end-begin <= minStepSize)
return sequential_filter(data,begin,end,predicate);
/* calculate number of tasks to use */
enum { MAX_TASKS = 64 };
const Index numThreads = TaskScheduler::threadCount();
const Index numBlocks = (end-begin+minStepSize-1)/minStepSize;
const Index taskCount = min(numThreads,numBlocks,(Index)MAX_TASKS);
/* filter blocks */
Index nused[MAX_TASKS];
Index nfree[MAX_TASKS];
parallel_for(taskCount, [&](const Index taskIndex)
{
const Index i0 = begin+(taskIndex+0)*(end-begin)/taskCount;
const Index i1 = begin+(taskIndex+1)*(end-begin)/taskCount;
const Index i2 = sequential_filter(data,i0,i1,predicate);
nused[taskIndex] = i2-i0;
nfree[taskIndex] = i1-i2;
});
/* calculate offsets */
Index sused=0;
Index sfree=0;
Index pfree[MAX_TASKS];
for (Index i=0; i<taskCount; i++)
{
sused+=nused[i];
Index cfree = nfree[i]; pfree[i] = sfree; sfree+=cfree;
}
/* return if we did not filter out any element */
assert(sfree <= end-begin);
assert(sused <= end-begin);
if (sused == end-begin)
return end;
/* otherwise we have to copy misplaced elements around */
parallel_for(taskCount, [&](const Index taskIndex)
{
/* destination to write elements to */
Index dst = begin+(taskIndex+0)*(end-begin)/taskCount+nused[taskIndex];
Index dst_end = min(dst+nfree[taskIndex],begin+sused);
if (dst_end <= dst) return;
/* range of misplaced elements to copy to destination */
Index r0 = pfree[taskIndex];
Index r1 = r0+dst_end-dst;
/* find range in misplaced elements in back to front order */
Index k0=0;
for (Index i=taskCount-1; i>0; i--)
{
if (k0 > r1) break;
Index k1 = k0+nused[i];
Index src = begin+(i+0)*(end-begin)/taskCount+nused[i];
for (Index i=max(r0,k0); i<min(r1,k1); i++) {
Index isrc = src-i+k0-1;
assert(dst >= begin && dst < end);
assert(isrc >= begin && isrc < end);
data[dst++] = data[isrc];
}
k0 = k1;
}
});
return begin+sused;
}
}

View File

@ -0,0 +1,48 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_for.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_for_regression_test : public RegressionTest
{
parallel_for_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<10000000; N=size_t(2.1*N))
{
/* sequentially calculate sum of squares */
size_t sum0 = 0;
for (size_t i=0; i<N; i++) {
sum0 += i*i;
}
/* parallel calculation of sum of squares */
for (size_t m=0; m<M; m++)
{
std::atomic<size_t> sum1(0);
parallel_for( size_t(0), size_t(N), size_t(1024), [&](const range<size_t>& r)
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++)
s += i*i;
sum1 += s;
});
passed = sum0 == sum1;
}
}
return passed;
}
};
parallel_for_regression_test parallel_for_regression("parallel_for_regression_test");
}

View File

@ -0,0 +1,156 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../tasking/taskscheduler.h"
#include "../sys/array.h"
#include "../math/math.h"
#include "../math/range.h"
namespace embree
{
/* parallel_for without range */
template<typename Index, typename Func>
__forceinline void parallel_for( const Index N, const Func& func)
{
#if defined(TASKING_INTERNAL)
if (N) {
TaskScheduler::spawn(Index(0),N,Index(1),[&] (const range<Index>& r) {
assert(r.size() == 1);
func(r.begin());
});
if (!TaskScheduler::wait())
throw std::runtime_error("task cancelled");
}
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
});
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
#elif defined(TASKING_PPL)
concurrency::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
});
#else
# error "no tasking system enabled"
#endif
}
/* parallel for with range and granulatity */
template<typename Index, typename Func>
__forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func)
{
assert(first <= last);
#if defined(TASKING_INTERNAL)
TaskScheduler::spawn(first,last,minStepSize,func);
if (!TaskScheduler::wait())
throw std::runtime_error("task cancelled");
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
func(range<Index>(r.begin(),r.end()));
},context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
func(range<Index>(r.begin(),r.end()));
});
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
#elif defined(TASKING_PPL)
concurrency::parallel_for(first, last, Index(1) /*minStepSize*/, [&](Index i) {
func(range<Index>(i,i+1));
});
#else
# error "no tasking system enabled"
#endif
}
/* parallel for with range */
template<typename Index, typename Func>
__forceinline void parallel_for( const Index first, const Index last, const Func& func)
{
assert(first <= last);
parallel_for(first,last,(Index)1,func);
}
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION > 4001)
template<typename Index, typename Func>
__forceinline void parallel_for_static( const Index N, const Func& func)
{
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},tbb::simple_partitioner(),context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},tbb::simple_partitioner());
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
}
typedef tbb::affinity_partitioner affinity_partitioner;
template<typename Index, typename Func>
__forceinline void parallel_for_affinity( const Index N, const Func& func, tbb::affinity_partitioner& ap)
{
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},ap,context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},ap);
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
#endif
}
#else
template<typename Index, typename Func>
__forceinline void parallel_for_static( const Index N, const Func& func)
{
parallel_for(N,func);
}
struct affinity_partitioner {
};
template<typename Index, typename Func>
__forceinline void parallel_for_affinity( const Index N, const Func& func, affinity_partitioner& ap)
{
parallel_for(N,func);
}
#endif
}

View File

@ -0,0 +1,63 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_for_for.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_for_for_regression_test : public RegressionTest
{
parallel_for_for_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create vector with random numbers */
size_t sum0 = 0;
size_t K = 0;
const size_t M = 1000;
std::vector<std::vector<size_t>* > array2(M);
for (size_t i=0; i<M; i++) {
const size_t N = rand() % 1024;
K+=N;
array2[i] = new std::vector<size_t>(N);
for (size_t j=0; j<N; j++)
sum0 += (*array2[i])[j] = rand();
}
/* array to test global index */
std::vector<atomic<size_t>> verify_k(K);
for (size_t i=0; i<K; i++) verify_k[i].store(0);
/* add all numbers using parallel_for_for */
std::atomic<size_t> sum1(0);
parallel_for_for( array2, size_t(1), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
s += (*v)[i];
verify_k[k++]++;
}
sum1 += s;
return sum1;
});
passed &= (sum0 == sum1);
/* check global index */
for (size_t i=0; i<K; i++)
passed &= (verify_k[i] == 1);
/* delete vectors again */
for (size_t i=0; i<array2.size(); i++)
delete array2[i];
return passed;
}
};
parallel_for_for_regression_test parallel_for_for_regression("parallel_for_for_regression_test");
}

View File

@ -0,0 +1,149 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename ArrayArray, typename Func>
__forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
{
size_t k=0;
for (size_t i=0; i!=array2.size(); ++i) {
const size_t N = array2[i]->size();
if (N) func(array2[i],range<size_t>(0,N),k);
k+=N;
}
}
class ParallelForForState
{
public:
enum { MAX_TASKS = 64 };
__forceinline ParallelForForState ()
: taskCount(0) {}
template<typename ArrayArray>
__forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
init(array2,minStepSize);
}
template<typename ArrayArray>
__forceinline void init ( ArrayArray& array2, const size_t minStepSize )
{
/* first calculate total number of elements */
size_t N = 0;
for (size_t i=0; i<array2.size(); i++) {
N += array2[i] ? array2[i]->size() : 0;
}
this->N = N;
/* calculate number of tasks to use */
const size_t numThreads = TaskScheduler::threadCount();
const size_t numBlocks = (N+minStepSize-1)/minStepSize;
taskCount = max(size_t(1),min(numThreads,numBlocks,size_t(ParallelForForState::MAX_TASKS)));
/* calculate start (i,j) for each task */
size_t taskIndex = 0;
i0[taskIndex] = 0;
j0[taskIndex] = 0;
size_t k0 = (++taskIndex)*N/taskCount;
for (size_t i=0, k=0; taskIndex < taskCount; i++)
{
assert(i<array2.size());
size_t j=0, M = array2[i] ? array2[i]->size() : 0;
while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
assert(taskIndex<taskCount);
i0[taskIndex] = i;
j0[taskIndex] = j += k0-k;
k=k0;
k0 = (++taskIndex)*N/taskCount;
}
k+=M-j;
}
}
__forceinline size_t size() const {
return N;
}
public:
size_t i0[MAX_TASKS];
size_t j0[MAX_TASKS];
size_t taskCount;
size_t N;
};
template<typename ArrayArray, typename Func>
__forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
{
ParallelForForState state(array2,minStepSize);
parallel_for(state.taskCount, [&](const size_t taskIndex)
{
/* calculate range */
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
for (size_t i=i0; k<k1; i++) {
const size_t N = array2[i] ? array2[i]->size() : 0;
const size_t r0 = j0, r1 = min(N,r0+k1-k);
if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
k+=r1-r0; j0 = 0;
}
});
}
template<typename ArrayArray, typename Func>
__forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
{
parallel_for_for(array2,1,func);
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
ParallelForForState state(array2,minStepSize);
Value temp[ParallelForForState::MAX_TASKS];
for (size_t i=0; i<state.taskCount; i++)
temp[i] = identity;
parallel_for(state.taskCount, [&](const size_t taskIndex)
{
/* calculate range */
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
for (size_t i=i0; k<k1; i++) {
const size_t N = array2[i] ? array2[i]->size() : 0;
const size_t r0 = j0, r1 = min(N,r0+k1-k);
if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
k+=r1-r0; j0 = 0;
}
});
Value ret = identity;
for (size_t i=0; i<state.taskCount; i++)
ret = reduction(ret,temp[i]);
return ret;
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_reduce(array2,1,identity,func,reduction);
}
}

View File

@ -0,0 +1,85 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_for_for_prefix_sum.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_for_for_prefix_sum_regression_test : public RegressionTest
{
parallel_for_for_prefix_sum_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create vector with random numbers */
const size_t M = 10;
std::vector<atomic<size_t>> flattened;
typedef std::vector<std::vector<size_t>* > ArrayArray;
ArrayArray array2(M);
size_t K = 0;
for (size_t i=0; i<M; i++) {
const size_t N = rand() % 10;
K += N;
array2[i] = new std::vector<size_t>(N);
for (size_t j=0; j<N; j++)
(*array2[i])[j] = rand() % 10;
}
/* array to test global index */
std::vector<atomic<size_t>> verify_k(K);
for (size_t i=0; i<K; i++) verify_k[i].store(0);
ParallelForForPrefixSumState<size_t> state(array2,size_t(1));
/* dry run only counts */
size_t S = parallel_for_for_prefix_sum0( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
s += (*v)[i];
verify_k[k++]++;
}
return s;
}, [](size_t v0, size_t v1) { return v0+v1; });
/* create properly sized output array */
flattened.resize(S);
for (auto& a : flattened) a.store(0);
/* now we actually fill the flattened array */
parallel_for_for_prefix_sum1( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i, const size_t base) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
for (size_t j=0; j<(*v)[i]; j++) {
flattened[base+s+j]++;
}
s += (*v)[i];
verify_k[k++]++;
}
return s;
}, [](size_t v0, size_t v1) { return v0+v1; });
/* check global index */
for (size_t i=0; i<K; i++)
passed &= (verify_k[i] == 2);
/* check if each element was assigned exactly once */
for (size_t i=0; i<flattened.size(); i++)
passed &= (flattened[i] == 1);
/* delete arrays again */
for (size_t i=0; i<array2.size(); i++)
delete array2[i];
return passed;
}
};
parallel_for_for_prefix_sum_regression_test parallel_for_for_prefix_sum_regression("parallel_for_for_prefix_sum_regression_test");
}

View File

@ -0,0 +1,112 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for_for.h"
#include "parallel_prefix_sum.h"
namespace embree
{
template<typename Value>
struct ParallelForForPrefixSumState : public ParallelForForState
{
__forceinline ParallelForForPrefixSumState () {}
template<typename ArrayArray>
__forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
: ParallelForForState(array2,minStepSize) {}
ParallelPrefixSumState<Value> prefix_state;
};
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize,
const Value& identity, const Func& func, const Reduction& reduction)
{
/* calculate number of tasks to use */
const size_t taskCount = state.taskCount;
/* perform parallel prefix sum */
parallel_for(taskCount, [&](const size_t taskIndex)
{
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
Value N=identity;
for (size_t i=i0; k<k1; i++) {
const size_t size = array2[i] ? array2[i]->size() : 0;
const size_t r0 = j0, r1 = min(size,r0+k1-k);
if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i));
k+=r1-r0; j0 = 0;
}
state.prefix_state.counts[taskIndex] = N;
});
/* calculate prefix sum */
Value sum=identity;
for (size_t i=0; i<taskCount; i++)
{
const Value c = state.prefix_state.counts[i];
state.prefix_state.sums[i] = sum;
sum=reduction(sum,c);
}
return sum;
}
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize,
const Value& identity, const Func& func, const Reduction& reduction)
{
/* calculate number of tasks to use */
const size_t taskCount = state.taskCount;
/* perform parallel prefix sum */
parallel_for(taskCount, [&](const size_t taskIndex)
{
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
size_t i0 = state.i0[taskIndex];
size_t j0 = state.j0[taskIndex];
/* iterate over arrays */
size_t k=k0;
Value N=identity;
for (size_t i=i0; k<k1; i++) {
const size_t size = array2[i] ? array2[i]->size() : 0;
const size_t r0 = j0, r1 = min(size,r0+k1-k);
if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i,reduction(state.prefix_state.sums[taskIndex],N)));
k+=r1-r0; j0 = 0;
}
state.prefix_state.counts[taskIndex] = N;
});
/* calculate prefix sum */
Value sum=identity;
for (size_t i=0; i<taskCount; i++)
{
const Value c = state.prefix_state.counts[i];
state.prefix_state.sums[i] = sum;
sum=reduction(sum,c);
}
return sum;
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction);
}
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
const Value& identity, const Func& func, const Reduction& reduction)
{
return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction);
}
}

View File

@ -0,0 +1,47 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_map.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_map_regression_test : public RegressionTest
{
parallel_map_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create key/value vectors with random numbers */
const size_t N = 10000;
std::vector<uint32_t> keys(N);
std::vector<uint32_t> vals(N);
for (size_t i=0; i<N; i++) keys[i] = 2*unsigned(i)*647382649;
for (size_t i=0; i<N; i++) std::swap(keys[i],keys[rand()%N]);
for (size_t i=0; i<N; i++) vals[i] = 2*rand();
/* create map */
parallel_map<uint32_t,uint32_t> map;
map.init(keys,vals);
/* check that all keys are properly mapped */
for (size_t i=0; i<N; i++) {
const uint32_t* val = map.lookup(keys[i]);
passed &= val && (*val == vals[i]);
}
/* check that these keys are not in the map */
for (size_t i=0; i<N; i++) {
passed &= !map.lookup(keys[i]+1);
}
return passed;
}
};
parallel_map_regression_test parallel_map_regression("parallel_map_regression_test");
}

View File

@ -0,0 +1,85 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_sort.h"
namespace embree
{
/*! implementation of a key/value map with parallel construction */
template<typename Key, typename Val>
class parallel_map
{
/* key/value pair to build the map */
struct KeyValue
{
__forceinline KeyValue () {}
__forceinline KeyValue (const Key key, const Val val)
: key(key), val(val) {}
__forceinline operator Key() const {
return key;
}
public:
Key key;
Val val;
};
public:
/*! parallel map constructors */
parallel_map () {}
/*! construction from pair of vectors */
template<typename KeyVector, typename ValVector>
parallel_map (const KeyVector& keys, const ValVector& values) { init(keys,values); }
/*! initialized the parallel map from a vector with keys and values */
template<typename KeyVector, typename ValVector>
void init(const KeyVector& keys, const ValVector& values)
{
/* reserve sufficient space for all data */
assert(keys.size() == values.size());
vec.resize(keys.size());
/* generate key/value pairs */
parallel_for( size_t(0), keys.size(), size_t(4*4096), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
vec[i] = KeyValue((Key)keys[i],values[i]);
});
/* perform parallel radix sort of the key/value pairs */
std::vector<KeyValue> temp(keys.size());
radix_sort<KeyValue,Key>(vec.data(),temp.data(),keys.size());
}
/*! Returns a pointer to the value associated with the specified key. The pointer will be nullptr of the key is not contained in the map. */
__forceinline const Val* lookup(const Key& key) const
{
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
if (i == vec.end()) return nullptr;
if (i->key != key) return nullptr;
return &i->val;
}
/*! If the key is in the map, the function returns the value associated with the key, otherwise it returns the default value. */
__forceinline Val lookup(const Key& key, const Val& def) const
{
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
if (i == vec.end()) return def;
if (i->key != key) return def;
return i->val;
}
/*! clears all state */
void clear() {
vec.clear();
}
private:
std::vector<KeyValue> vec; //!< vector containing sorted elements
};
}

View File

@ -0,0 +1,53 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_partition.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_partition_regression_test : public RegressionTest
{
parallel_partition_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
for (size_t i=0; i<100; i++)
{
/* create random permutation */
size_t N = std::rand() % 1000000;
std::vector<unsigned> array(N);
for (unsigned i=0; i<N; i++) array[i] = i;
for (auto& v : array) std::swap(v,array[std::rand()%array.size()]);
size_t split = std::rand() % (N+1);
/* perform parallel partitioning */
size_t left_sum = 0, right_sum = 0;
size_t mid = parallel_partitioning(array.data(),0,array.size(),0,left_sum,right_sum,
[&] ( size_t i ) { return i < split; },
[] ( size_t& sum, unsigned v) { sum += v; },
[] ( size_t& sum, size_t v) { sum += v; },
128);
/*serial_partitioning(array.data(),0,array.size(),left_sum,right_sum,
[&] ( size_t i ) { return i < split; },
[] ( size_t& left_sum, int v) { left_sum += v; });*/
/* verify result */
passed &= mid == split;
passed &= left_sum == split*(split-1)/2;
passed &= right_sum == N*(N-1)/2-left_sum;
for (size_t i=0; i<split; i++) passed &= array[i] < split;
for (size_t i=split; i<N; i++) passed &= array[i] >= split;
}
return passed;
}
};
parallel_partition_regression_test parallel_partition_regression("parallel_partition_regression_test");
}

View File

@ -0,0 +1,283 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
#include "../math/range.h"
namespace embree
{
/* serial partitioning */
template<typename T, typename V, typename IsLeft, typename Reduction_T>
__forceinline size_t serial_partitioning(T* array,
const size_t begin,
const size_t end,
V& leftReduction,
V& rightReduction,
const IsLeft& is_left,
const Reduction_T& reduction_t)
{
T* l = array + begin;
T* r = array + end - 1;
while(1)
{
/* *l < pivot */
while (likely(l <= r && is_left(*l) ))
{
//prefetchw(l+4); // FIXME: enable?
reduction_t(leftReduction,*l);
++l;
}
/* *r >= pivot) */
while (likely(l <= r && !is_left(*r)))
{
//prefetchw(r-4); FIXME: enable?
reduction_t(rightReduction,*r);
--r;
}
if (r<l) break;
reduction_t(leftReduction ,*r);
reduction_t(rightReduction,*l);
xchg(*l,*r);
l++; r--;
}
return l - array;
}
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
class __aligned(64) parallel_partition_task
{
ALIGNED_CLASS_(64);
private:
static const size_t MAX_TASKS = 64;
T* array;
size_t N;
const IsLeft& is_left;
const Reduction_T& reduction_t;
const Reduction_V& reduction_v;
const Vi& identity;
size_t numTasks;
__aligned(64) size_t counter_start[MAX_TASKS+1];
__aligned(64) size_t counter_left[MAX_TASKS+1];
__aligned(64) range<ssize_t> leftMisplacedRanges[MAX_TASKS];
__aligned(64) range<ssize_t> rightMisplacedRanges[MAX_TASKS];
__aligned(64) V leftReductions[MAX_TASKS];
__aligned(64) V rightReductions[MAX_TASKS];
public:
__forceinline parallel_partition_task(T* array,
const size_t N,
const Vi& identity,
const IsLeft& is_left,
const Reduction_T& reduction_t,
const Reduction_V& reduction_v,
const size_t BLOCK_SIZE)
: array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity),
numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {}
__forceinline const range<ssize_t>* findStartRange(size_t& index, const range<ssize_t>* const r, const size_t numRanges)
{
size_t i = 0;
while(index >= (size_t)r[i].size())
{
assert(i < numRanges);
index -= (size_t)r[i].size();
i++;
}
return &r[i];
}
__forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges,
const size_t numRightMisplacedRanges,
const size_t startID,
const size_t endID)
{
size_t leftLocalIndex = startID;
size_t rightLocalIndex = startID;
const range<ssize_t>* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges);
const range<ssize_t>* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges);
size_t l_left = l_range->size() - leftLocalIndex;
size_t r_left = r_range->size() - rightLocalIndex;
T *__restrict__ l = &array[l_range->begin() + leftLocalIndex];
T *__restrict__ r = &array[r_range->begin() + rightLocalIndex];
size_t size = endID - startID;
size_t items = min(size,min(l_left,r_left));
while (size)
{
if (unlikely(l_left == 0))
{
l_range++;
l_left = l_range->size();
l = &array[l_range->begin()];
items = min(size,min(l_left,r_left));
}
if (unlikely(r_left == 0))
{
r_range++;
r_left = r_range->size();
r = &array[r_range->begin()];
items = min(size,min(l_left,r_left));
}
size -= items;
l_left -= items;
r_left -= items;
while(items) {
items--;
xchg(*l++,*r++);
}
}
}
__forceinline size_t partition(V& leftReduction, V& rightReduction)
{
/* partition the individual ranges for each task */
parallel_for(numTasks,[&] (const size_t taskID) {
const size_t startID = (taskID+0)*N/numTasks;
const size_t endID = (taskID+1)*N/numTasks;
V local_left(identity);
V local_right(identity);
const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t);
counter_start[taskID] = startID;
counter_left [taskID] = mid-startID;
leftReductions[taskID] = local_left;
rightReductions[taskID] = local_right;
});
counter_start[numTasks] = N;
counter_left[numTasks] = 0;
/* finalize the reductions */
for (size_t i=0; i<numTasks; i++) {
reduction_v(leftReduction,leftReductions[i]);
reduction_v(rightReduction,rightReductions[i]);
}
/* calculate mid point for partitioning */
size_t mid = counter_left[0];
for (size_t i=1; i<numTasks; i++)
mid += counter_left[i];
const range<ssize_t> globalLeft (0,mid);
const range<ssize_t> globalRight(mid,N);
/* calculate all left and right ranges that are on the wrong global side */
size_t numMisplacedRangesLeft = 0;
size_t numMisplacedRangesRight = 0;
size_t numMisplacedItemsLeft = 0;
size_t numMisplacedItemsRight = 0;
for (size_t i=0; i<numTasks; i++)
{
const range<ssize_t> left_range (counter_start[i], counter_start[i] + counter_left[i]);
const range<ssize_t> right_range(counter_start[i] + counter_left[i], counter_start[i+1]);
const range<ssize_t> left_misplaced = globalLeft. intersect(right_range);
const range<ssize_t> right_misplaced = globalRight.intersect(left_range);
if (!left_misplaced.empty())
{
numMisplacedItemsLeft += left_misplaced.size();
leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced;
}
if (!right_misplaced.empty())
{
numMisplacedItemsRight += right_misplaced.size();
rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced;
}
}
assert( numMisplacedItemsLeft == numMisplacedItemsRight );
/* if no items are misplaced we are done */
if (numMisplacedItemsLeft == 0)
return mid;
/* otherwise we copy the items to the right place in parallel */
parallel_for(numTasks,[&] (const size_t taskID) {
const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks;
const size_t endID = (taskID+1)*numMisplacedItemsLeft/numTasks;
swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID);
});
return mid;
}
};
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
__noinline size_t parallel_partitioning(T* array,
const size_t begin,
const size_t end,
const Vi &identity,
V &leftReduction,
V &rightReduction,
const IsLeft& is_left,
const Reduction_T& reduction_t,
const Reduction_V& reduction_v,
size_t BLOCK_SIZE = 128)
{
/* fall back to single threaded partitioning for small N */
if (unlikely(end-begin < BLOCK_SIZE))
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
/* otherwise use parallel code */
else {
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
return begin+p->partition(leftReduction,rightReduction);
}
}
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
__noinline size_t parallel_partitioning(T* array,
const size_t begin,
const size_t end,
const Vi &identity,
V &leftReduction,
V &rightReduction,
const IsLeft& is_left,
const Reduction_T& reduction_t,
const Reduction_V& reduction_v,
size_t BLOCK_SIZE,
size_t PARALLEL_THRESHOLD)
{
/* fall back to single threaded partitioning for small N */
if (unlikely(end-begin < PARALLEL_THRESHOLD))
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
/* otherwise use parallel code */
else {
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
return begin+p->partition(leftReduction,rightReduction);
}
}
template<typename T, typename IsLeft>
inline size_t parallel_partitioning(T* array,
const size_t begin,
const size_t end,
const IsLeft& is_left,
size_t BLOCK_SIZE = 128)
{
size_t leftReduction = 0;
size_t rightReduction = 0;
return parallel_partitioning(
array,begin,end,0,leftReduction,rightReduction,is_left,
[] (size_t& t,const T& ref) { },
[] (size_t& t0,size_t& t1) { },
BLOCK_SIZE);
}
}

View File

@ -0,0 +1,48 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_prefix_sum.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_prefix_sum_regression_test : public RegressionTest
{
parallel_prefix_sum_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<10000000; N=size_t(2.1*N))
{
/* initialize array with random numbers */
uint32_t sum0 = 0;
std::vector<uint32_t> src(N);
for (size_t i=0; i<N; i++) {
sum0 += src[i] = rand();
}
/* calculate parallel prefix sum */
std::vector<uint32_t> dst(N);
for (auto& v : dst) v = 0;
for (size_t i=0; i<M; i++) {
uint32_t sum1 = parallel_prefix_sum(src,dst,N,0,std::plus<uint32_t>());
passed &= (sum0 == sum1);
}
/* check if prefix sum is correct */
for (size_t i=0, sum=0; i<N; sum+=src[i++])
passed &= ((uint32_t)sum == dst[i]);
}
return passed;
}
};
parallel_prefix_sum_regression_test parallel_prefix_sum_regression("parallel_prefix_sum_regression");
}

View File

@ -0,0 +1,85 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename Value>
struct ParallelPrefixSumState
{
enum { MAX_TASKS = 64 };
Value counts[MAX_TASKS];
Value sums [MAX_TASKS];
};
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
{
/* calculate number of tasks to use */
const size_t numThreads = TaskScheduler::threadCount();
const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
/* perform parallel prefix sum */
parallel_for(taskCount, [&](const size_t taskIndex)
{
const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
});
/* calculate prefix sum */
Value sum=identity;
for (size_t i=0; i<taskCount; i++)
{
const Value c = state.counts[i];
state.sums[i] = sum;
sum=reduction(sum,c);
}
return sum;
}
/*! parallel calculation of prefix sums */
template<typename SrcArray, typename DstArray, typename Value, typename Add>
__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
{
/* perform single threaded prefix operation for small N */
if (N < SINGLE_THREAD_THRESHOLD)
{
Value sum=identity;
for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
return sum;
}
/* perform parallel prefix operation for large N */
else
{
ParallelPrefixSumState<Value> state;
/* initial run just sets up start values for subtasks */
parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
Value s = identity;
for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
return s;
}, add);
/* final run calculates prefix sum */
return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
Value s = identity;
for (size_t i=r.begin(); i<r.end(); i++) {
dst[i] = add(sum,s);
s = add(s,src[i]);
}
return s;
}, add);
}
}
}

View File

@ -0,0 +1,49 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_reduce.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_reduce_regression_test : public RegressionTest
{
parallel_reduce_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<10000000; N=size_t(2.1*N))
{
/* sequentially calculate sum of squares */
size_t sum0 = 0;
for (size_t i=0; i<N; i++) {
sum0 += i*i;
}
/* parallel calculation of sum of squares */
for (size_t m=0; m<M; m++)
{
size_t sum1 = parallel_reduce( size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r) -> size_t
{
size_t s = 0;
for (size_t i=r.begin(); i<r.end(); i++)
s += i*i;
return s;
},
[](const size_t v0, const size_t v1) {
return v0+v1;
});
passed = sum0 == sum1;
}
}
return passed;
}
};
parallel_reduce_regression_test parallel_reduce_regression("parallel_reduce_regression_test");
}

View File

@ -0,0 +1,146 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_for.h"
namespace embree
{
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
{
return func(range<Index>(first,last));
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
return func(range<Index>(first,last));
}
template<typename Index, typename Value, typename Func, typename Reduction>
__noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
const Index maxTasks = 512;
const Index threadCount = (Index) TaskScheduler::threadCount();
taskCount = min(taskCount,threadCount,maxTasks);
/* parallel invokation of all tasks */
dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack
parallel_for(taskCount, [&](const Index taskIndex) {
const Index k0 = first+(taskIndex+0)*(last-first)/taskCount;
const Index k1 = first+(taskIndex+1)*(last-first)/taskCount;
values[taskIndex] = func(range<Index>(k0,k1));
});
/* perform reduction over all tasks */
Value v = identity;
for (Index i=0; i<taskCount; i++) v = reduction(v,values[i]);
return v;
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
#if defined(TASKING_INTERNAL)
/* fast path for small number of iterations */
Index taskCount = (last-first+minStepSize-1)/minStepSize;
if (likely(taskCount == 1)) {
return func(range<Index>(first,last));
}
return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction);
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
tbb::task_group_context context;
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
reduction,context);
if (context.is_group_execution_cancelled())
throw std::runtime_error("task cancelled");
return v;
#else
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
reduction);
if (tbb::task::self().is_cancelled())
throw std::runtime_error("task cancelled");
return v;
#endif
#else // TASKING_PPL
struct AlignedValue
{
char storage[__alignof(Value)+sizeof(Value)];
static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); };
Value* getValuePtr() { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
const Value* getValuePtr() const { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
AlignedValue(const Value& v) { new(getValuePtr()) Value(v); }
AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); }
AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); };
AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
operator Value() const { return *getValuePtr(); }
};
struct Iterator_Index
{
Index v;
typedef std::forward_iterator_tag iterator_category;
typedef AlignedValue value_type;
typedef Index difference_type;
typedef Index distance_type;
typedef AlignedValue* pointer;
typedef AlignedValue& reference;
__forceinline Iterator_Index() {}
__forceinline Iterator_Index(Index v) : v(v) {}
__forceinline bool operator== (Iterator_Index other) { return v == other.v; }
__forceinline bool operator!= (Iterator_Index other) { return v != other.v; }
__forceinline Iterator_Index operator++() { return Iterator_Index(++v); }
__forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); }
};
auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) {
assert(begin.v < end.v);
return reduction(start, func(range<Index>(begin.v, end.v)));
};
const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction);
return v;
#endif
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
{
if (likely(last-first < parallel_threshold)) {
return func(range<Index>(first,last));
} else {
return parallel_reduce(first,last,minStepSize,identity,func,reduction);
}
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const range<Index> range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
{
return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction);
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
{
auto funcr = [&] ( const range<Index> r ) {
Value v = identity;
for (Index i=r.begin(); i<r.end(); i++)
v = reduction(v,func(i));
return v;
};
return parallel_reduce(first,last,Index(1),identity,funcr,reduction);
}
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const range<Index> range, const Value& identity, const Func& func, const Reduction& reduction )
{
return parallel_reduce(range.begin(),range.end(),Index(1),identity,func,reduction);
}
}

View File

@ -0,0 +1,43 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_set.h"
#include "../sys/regression.h"
namespace embree
{
struct parallel_set_regression_test : public RegressionTest
{
parallel_set_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
/* create vector with random numbers */
const size_t N = 10000;
std::vector<uint32_t> unsorted(N);
for (size_t i=0; i<N; i++) unsorted[i] = 2*rand();
/* created set from numbers */
parallel_set<uint32_t> sorted;
sorted.init(unsorted);
/* check that all elements are in the set */
for (size_t i=0; i<N; i++) {
passed &= sorted.lookup(unsorted[i]);
}
/* check that these elements are not in the set */
for (size_t i=0; i<N; i++) {
passed &= !sorted.lookup(unsorted[i]+1);
}
return passed;
}
};
parallel_set_regression_test parallel_set_regression("parallel_set_regression_test");
}

View File

@ -0,0 +1,52 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "parallel_sort.h"
namespace embree
{
/* implementation of a set of values with parallel construction */
template<typename T>
class parallel_set
{
public:
/*! default constructor for the parallel set */
parallel_set () {}
/*! construction from vector */
template<typename Vector>
parallel_set (const Vector& in) { init(in); }
/*! initialized the parallel set from a vector */
template<typename Vector>
void init(const Vector& in)
{
/* copy data to internal vector */
vec.resize(in.size());
parallel_for( size_t(0), in.size(), size_t(4*4096), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
vec[i] = in[i];
});
/* sort the data */
std::vector<T> temp(in.size());
radix_sort<T>(vec.data(),temp.data(),vec.size());
}
/*! tests if some element is in the set */
__forceinline bool lookup(const T& elt) const {
return std::binary_search(vec.begin(), vec.end(), elt);
}
/*! clears all state */
void clear() {
vec.clear();
}
private:
std::vector<T> vec; //!< vector containing sorted elements
};
}

View File

@ -0,0 +1,50 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "parallel_sort.h"
#include "../sys/regression.h"
namespace embree
{
template<typename Key>
struct RadixSortRegressionTest : public RegressionTest
{
RadixSortRegressionTest(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
const size_t M = 10;
for (size_t N=10; N<1000000; N=size_t(2.1*N))
{
std::vector<Key> src(N); memset(src.data(),0,N*sizeof(Key));
std::vector<Key> tmp(N); memset(tmp.data(),0,N*sizeof(Key));
for (size_t i=0; i<N; i++) src[i] = uint64_t(rand())*uint64_t(rand());
/* calculate checksum */
Key sum0 = 0; for (size_t i=0; i<N; i++) sum0 += src[i];
/* sort numbers */
for (size_t i=0; i<M; i++) {
radix_sort<Key>(src.data(),tmp.data(),N);
}
/* calculate checksum */
Key sum1 = 0; for (size_t i=0; i<N; i++) sum1 += src[i];
if (sum0 != sum1) passed = false;
/* check if numbers are sorted */
for (size_t i=1; i<N; i++)
passed &= src[i-1] <= src[i];
}
return passed;
}
};
RadixSortRegressionTest<uint32_t> test_u32("RadixSortRegressionTestU32");
RadixSortRegressionTest<uint64_t> test_u64("RadixSortRegressionTestU64");
}

View File

@ -0,0 +1,454 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../simd/simd.h"
#include "parallel_for.h"
#include <algorithm>
namespace embree
{
template<class T>
__forceinline void insertionsort_ascending(T *__restrict__ array, const size_t length)
{
for(size_t i = 1;i<length;++i)
{
T v = array[i];
size_t j = i;
while(j > 0 && v < array[j-1])
{
array[j] = array[j-1];
--j;
}
array[j] = v;
}
}
template<class T>
__forceinline void insertionsort_decending(T *__restrict__ array, const size_t length)
{
for(size_t i = 1;i<length;++i)
{
T v = array[i];
size_t j = i;
while(j > 0 && v > array[j-1])
{
array[j] = array[j-1];
--j;
}
array[j] = v;
}
}
template<class T>
void quicksort_ascending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] > pivotvalue);
while (t[++left] < pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const int pivot = right;
quicksort_ascending(t, begin, pivot);
quicksort_ascending(t, pivot + 1, end);
}
}
template<class T>
void quicksort_decending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] < pivotvalue);
while (t[++left] > pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const int pivot = right;
quicksort_decending(t, begin, pivot);
quicksort_decending(t, pivot + 1, end);
}
}
template<class T, ssize_t THRESHOLD>
void quicksort_insertionsort_ascending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const ssize_t size = end-begin+1;
if (likely(size <= THRESHOLD))
{
insertionsort_ascending<T>(&t[begin],size);
}
else
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] > pivotvalue);
while (t[++left] < pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const ssize_t pivot = right;
quicksort_insertionsort_ascending<T,THRESHOLD>(t, begin, pivot);
quicksort_insertionsort_ascending<T,THRESHOLD>(t, pivot + 1, end);
}
}
}
template<class T, ssize_t THRESHOLD>
void quicksort_insertionsort_decending(T *__restrict__ t,
const ssize_t begin,
const ssize_t end)
{
if (likely(begin < end))
{
const ssize_t size = end-begin+1;
if (likely(size <= THRESHOLD))
{
insertionsort_decending<T>(&t[begin],size);
}
else
{
const T pivotvalue = t[begin];
ssize_t left = begin - 1;
ssize_t right = end + 1;
while(1)
{
while (t[--right] < pivotvalue);
while (t[++left] > pivotvalue);
if (left >= right) break;
const T temp = t[right];
t[right] = t[left];
t[left] = temp;
}
const ssize_t pivot = right;
quicksort_insertionsort_decending<T,THRESHOLD>(t, begin, pivot);
quicksort_insertionsort_decending<T,THRESHOLD>(t, pivot + 1, end);
}
}
}
template<typename T>
static void radixsort32(T* const morton, const size_t num, const unsigned int shift = 3*8)
{
static const unsigned int BITS = 8;
static const unsigned int BUCKETS = (1 << BITS);
static const unsigned int CMP_SORT_THRESHOLD = 16;
__aligned(64) unsigned int count[BUCKETS];
/* clear buckets */
for (size_t i=0;i<BUCKETS;i++) count[i] = 0;
/* count buckets */
#if defined(__INTEL_COMPILER)
#pragma nounroll
#endif
for (size_t i=0;i<num;i++)
count[(unsigned(morton[i]) >> shift) & (BUCKETS-1)]++;
/* prefix sums */
__aligned(64) unsigned int head[BUCKETS];
__aligned(64) unsigned int tail[BUCKETS];
head[0] = 0;
for (size_t i=1; i<BUCKETS; i++)
head[i] = head[i-1] + count[i-1];
for (size_t i=0; i<BUCKETS-1; i++)
tail[i] = head[i+1];
tail[BUCKETS-1] = head[BUCKETS-1] + count[BUCKETS-1];
assert(tail[BUCKETS-1] == head[BUCKETS-1] + count[BUCKETS-1]);
assert(tail[BUCKETS-1] == num);
/* in-place swap */
for (size_t i=0;i<BUCKETS;i++)
{
/* process bucket */
while(head[i] < tail[i])
{
T v = morton[head[i]];
while(1)
{
const size_t b = (unsigned(v) >> shift) & (BUCKETS-1);
if (b == i) break;
std::swap(v,morton[head[b]++]);
}
assert((unsigned(v) >> shift & (BUCKETS-1)) == i);
morton[head[i]++] = v;
}
}
if (shift == 0) return;
size_t offset = 0;
for (size_t i=0;i<BUCKETS;i++)
if (count[i])
{
for (size_t j=offset;j<offset+count[i]-1;j++)
assert(((unsigned(morton[j]) >> shift) & (BUCKETS-1)) == i);
if (unlikely(count[i] < CMP_SORT_THRESHOLD))
insertionsort_ascending(morton + offset, count[i]);
else
radixsort32(morton + offset, count[i], shift-BITS);
for (size_t j=offset;j<offset+count[i]-1;j++)
assert(morton[j] <= morton[j+1]);
offset += count[i];
}
}
template<typename Ty, typename Key>
class ParallelRadixSort
{
static const size_t MAX_TASKS = 64;
static const size_t BITS = 8;
static const size_t BUCKETS = (1 << BITS);
typedef unsigned int TyRadixCount[BUCKETS];
template<typename T>
static bool compare(const T& v0, const T& v1) {
return (Key)v0 < (Key)v1;
}
private:
ParallelRadixSort (const ParallelRadixSort& other) DELETED; // do not implement
ParallelRadixSort& operator= (const ParallelRadixSort& other) DELETED; // do not implement
public:
ParallelRadixSort (Ty* const src, Ty* const tmp, const size_t N)
: radixCount(nullptr), src(src), tmp(tmp), N(N) {}
void sort(const size_t blockSize)
{
assert(blockSize > 0);
/* perform single threaded sort for small N */
if (N<=blockSize) // handles also special case of 0!
{
/* do inplace sort inside destination array */
std::sort(src,src+N,compare<Ty>);
}
/* perform parallel sort for large N */
else
{
const size_t numThreads = min((N+blockSize-1)/blockSize,TaskScheduler::threadCount(),size_t(MAX_TASKS));
tbbRadixSort(numThreads);
}
}
~ParallelRadixSort()
{
alignedFree(radixCount);
radixCount = nullptr;
}
private:
void tbbRadixIteration0(const Key shift,
const Ty* __restrict const src,
Ty* __restrict const dst,
const size_t threadIndex, const size_t threadCount)
{
const size_t startID = (threadIndex+0)*N/threadCount;
const size_t endID = (threadIndex+1)*N/threadCount;
/* mask to extract some number of bits */
const Key mask = BUCKETS-1;
/* count how many items go into the buckets */
for (size_t i=0; i<BUCKETS; i++)
radixCount[threadIndex][i] = 0;
/* iterate over src array and count buckets */
unsigned int * __restrict const count = radixCount[threadIndex];
#if defined(__INTEL_COMPILER)
#pragma nounroll
#endif
for (size_t i=startID; i<endID; i++) {
#if defined(__X86_64__)
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
#else
const Key index = ((Key)src[i] >> shift) & mask;
#endif
count[index]++;
}
}
void tbbRadixIteration1(const Key shift,
const Ty* __restrict const src,
Ty* __restrict const dst,
const size_t threadIndex, const size_t threadCount)
{
const size_t startID = (threadIndex+0)*N/threadCount;
const size_t endID = (threadIndex+1)*N/threadCount;
/* mask to extract some number of bits */
const Key mask = BUCKETS-1;
/* calculate total number of items for each bucket */
__aligned(64) unsigned int total[BUCKETS];
/*
for (size_t i=0; i<BUCKETS; i++)
total[i] = 0;
*/
for (size_t i=0; i<BUCKETS; i+=VSIZEX)
vintx::store(&total[i], zero);
for (size_t i=0; i<threadCount; i++)
{
/*
for (size_t j=0; j<BUCKETS; j++)
total[j] += radixCount[i][j];
*/
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
vintx::store(&total[j], vintx::load(&total[j]) + vintx::load(&radixCount[i][j]));
}
/* calculate start offset of each bucket */
__aligned(64) unsigned int offset[BUCKETS];
offset[0] = 0;
for (size_t i=1; i<BUCKETS; i++)
offset[i] = offset[i-1] + total[i-1];
/* calculate start offset of each bucket for this thread */
for (size_t i=0; i<threadIndex; i++)
{
/*
for (size_t j=0; j<BUCKETS; j++)
offset[j] += radixCount[i][j];
*/
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
vintx::store(&offset[j], vintx::load(&offset[j]) + vintx::load(&radixCount[i][j]));
}
/* copy items into their buckets */
#if defined(__INTEL_COMPILER)
#pragma nounroll
#endif
for (size_t i=startID; i<endID; i++) {
const Ty elt = src[i];
#if defined(__X86_64__)
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
#else
const size_t index = ((Key)src[i] >> shift) & mask;
#endif
dst[offset[index]++] = elt;
}
}
void tbbRadixIteration(const Key shift, const bool last,
const Ty* __restrict src, Ty* __restrict dst,
const size_t numTasks)
{
affinity_partitioner ap;
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration0(shift,src,dst,taskIndex,numTasks); },ap);
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration1(shift,src,dst,taskIndex,numTasks); },ap);
}
void tbbRadixSort(const size_t numTasks)
{
radixCount = (TyRadixCount*) alignedMalloc(MAX_TASKS*sizeof(TyRadixCount),64);
if (sizeof(Key) == sizeof(uint32_t)) {
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
tbbRadixIteration(3*BITS,1,tmp,src,numTasks);
}
else if (sizeof(Key) == sizeof(uint64_t))
{
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
tbbRadixIteration(3*BITS,0,tmp,src,numTasks);
tbbRadixIteration(4*BITS,0,src,tmp,numTasks);
tbbRadixIteration(5*BITS,0,tmp,src,numTasks);
tbbRadixIteration(6*BITS,0,src,tmp,numTasks);
tbbRadixIteration(7*BITS,1,tmp,src,numTasks);
}
}
private:
TyRadixCount* radixCount;
Ty* const src;
Ty* const tmp;
const size_t N;
};
template<typename Ty>
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
{
ParallelRadixSort<Ty,Ty>(src,tmp,N).sort(blockSize);
}
template<typename Ty, typename Key>
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
{
ParallelRadixSort<Ty,Key>(src,tmp,N).sort(blockSize);
}
template<typename Ty>
void radix_sort_u32(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
radix_sort<Ty,uint32_t>(src,tmp,N,blockSize);
}
template<typename Ty>
void radix_sort_u64(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
radix_sort<Ty,uint64_t>(src,tmp,N,blockSize);
}
}

View File

@ -0,0 +1,101 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stringstream.h"
#include "../sys/filename.h"
#include "../math/vec2.h"
#include "../math/vec3.h"
#include "../math/col3.h"
#include "../math/color.h"
namespace embree
{
/*! helper class for simple command line parsing */
class ParseStream : public Stream<std::string>
{
public:
ParseStream (const Ref<Stream<std::string> >& cin) : cin(cin) {}
ParseStream (const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
const std::string& endl = "", bool multiLine = false)
: cin(new StringStream(cin,seps,endl,multiLine)) {}
public:
ParseLocation location() { return cin->loc(); }
std::string next() { return cin->get(); }
void force(const std::string& next) {
std::string token = getString();
if (token != next)
THROW_RUNTIME_ERROR("token \""+next+"\" expected but token \""+token+"\" found");
}
std::string getString() {
return get();
}
FileName getFileName() {
return FileName(get());
}
int getInt () {
return atoi(get().c_str());
}
Vec2i getVec2i() {
int x = atoi(get().c_str());
int y = atoi(get().c_str());
return Vec2i(x,y);
}
Vec3ia getVec3ia() {
int x = atoi(get().c_str());
int y = atoi(get().c_str());
int z = atoi(get().c_str());
return Vec3ia(x,y,z);
}
float getFloat() {
return (float)atof(get().c_str());
}
Vec2f getVec2f() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
return Vec2f(x,y);
}
Vec3f getVec3f() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
float z = (float)atof(get().c_str());
return Vec3f(x,y,z);
}
Vec3fa getVec3fa() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
float z = (float)atof(get().c_str());
return Vec3fa(x,y,z);
}
Col3f getCol3f() {
float x = (float)atof(get().c_str());
float y = (float)atof(get().c_str());
float z = (float)atof(get().c_str());
return Col3f(x,y,z);
}
Color getColor() {
float r = (float)atof(get().c_str());
float g = (float)atof(get().c_str());
float b = (float)atof(get().c_str());
return Color(r,g,b);
}
private:
Ref<Stream<std::string> > cin;
};
}

215
thirdparty/embree/common/lexers/stream.h vendored Normal file
View File

@ -0,0 +1,215 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/ref.h"
#include "../sys/filename.h"
#include "../sys/string.h"
#include <vector>
#include <iostream>
#include <cstdio>
#include <string.h>
namespace embree
{
/*! stores the location of a stream element in the source */
class ParseLocation
{
public:
ParseLocation () : lineNumber(-1), colNumber(-1) {}
ParseLocation (std::shared_ptr<std::string> fileName, ssize_t lineNumber, ssize_t colNumber, ssize_t /*charNumber*/)
: fileName(fileName), lineNumber(lineNumber), colNumber(colNumber) {}
std::string str() const
{
std::string str = "unknown";
if (fileName) str = *fileName;
if (lineNumber >= 0) str += " line " + toString(lineNumber);
if (lineNumber >= 0 && colNumber >= 0) str += " character " + toString(colNumber);
return str;
}
private:
std::shared_ptr<std::string> fileName; /// name of the file (or stream) the token is from
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
};
/*! a stream class templated over the stream elements */
template<typename T> class Stream : public RefCount
{
enum { BUF_SIZE = 1024 };
private:
virtual T next() = 0;
virtual ParseLocation location() = 0;
__forceinline std::pair<T,ParseLocation> nextHelper() {
ParseLocation l = location();
T v = next();
return std::pair<T,ParseLocation>(v,l);
}
__forceinline void push_back(const std::pair<T,ParseLocation>& v) {
if (past+future == BUF_SIZE) pop_front();
size_t end = (start+past+future++)%BUF_SIZE;
buffer[end] = v;
}
__forceinline void pop_front() {
if (past == 0) THROW_RUNTIME_ERROR("stream buffer empty");
start = (start+1)%BUF_SIZE; past--;
}
public:
Stream () : start(0), past(0), future(0), buffer(BUF_SIZE) {}
virtual ~Stream() {}
public:
const ParseLocation& loc() {
if (future == 0) push_back(nextHelper());
return buffer[(start+past)%BUF_SIZE].second;
}
T get() {
if (future == 0) push_back(nextHelper());
T t = buffer[(start+past)%BUF_SIZE].first;
past++; future--;
return t;
}
const T& peek() {
if (future == 0) push_back(nextHelper());
return buffer[(start+past)%BUF_SIZE].first;
}
const T& unget(size_t n = 1) {
if (past < n) THROW_RUNTIME_ERROR ("cannot unget that many items");
past -= n; future += n;
return peek();
}
void drop() {
if (future == 0) push_back(nextHelper());
past++; future--;
}
private:
size_t start,past,future;
std::vector<std::pair<T,ParseLocation> > buffer;
};
/*! warps an iostream stream */
class StdStream : public Stream<int>
{
public:
StdStream (std::istream& cin, const std::string& name = "std::stream")
: cin(cin), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
~StdStream() {}
ParseLocation location() {
return ParseLocation(name,lineNumber,colNumber,charNumber);
}
int next() {
int c = cin.get();
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
std::istream& cin;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file
std::shared_ptr<std::string> name; /// name of buffer
};
/*! creates a stream from a file */
class FileStream : public Stream<int>
{
public:
FileStream (FILE* file, const std::string& name = "file")
: file(file), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
FileStream (const FileName& fileName)
: lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
{
file = fopen(fileName.c_str(),"r");
if (file == nullptr) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
}
~FileStream() { if (file) fclose(file); }
public:
ParseLocation location() {
return ParseLocation(name,lineNumber,colNumber,charNumber);
}
int next() {
int c = fgetc(file);
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
FILE* file;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file
std::shared_ptr<std::string> name; /// name of buffer
};
/*! creates a stream from a string */
class StrStream : public Stream<int>
{
public:
StrStream (const char* str)
: str(str), lineNumber(1), colNumber(0), charNumber(0) {}
public:
ParseLocation location() {
return ParseLocation(std::shared_ptr<std::string>(),lineNumber,colNumber,charNumber);
}
int next() {
int c = str[charNumber];
if (c == 0) return EOF;
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
const char* str;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file
};
/*! creates a character stream from a command line */
class CommandLineStream : public Stream<int>
{
public:
CommandLineStream (int argc, char** argv, const std::string& name = "command line")
: i(0), j(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name)))
{
if (argc > 0) {
for (size_t i=0; argv[0][i] && i<1024; i++) charNumber++;
charNumber++;
}
for (ssize_t k=1; k<argc; k++) args.push_back(argv[k]);
}
~CommandLineStream() {}
public:
ParseLocation location() {
return ParseLocation(name,0,charNumber,charNumber);
}
int next() {
if (i == args.size()) return EOF;
if (j == args[i].size()) { i++; j=0; charNumber++; return ' '; }
charNumber++;
return args[i][j++];
}
private:
size_t i,j;
std::vector<std::string> args;
ssize_t charNumber; /// the character in the file
std::shared_ptr<std::string> name; /// name of buffer
};
}

View File

@ -0,0 +1,39 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stream.h"
namespace embree
{
/* removes all line comments from a stream */
class LineCommentFilter : public Stream<int>
{
public:
LineCommentFilter (const FileName& fileName, const std::string& lineComment)
: cin(new FileStream(fileName)), lineComment(lineComment) {}
LineCommentFilter (Ref<Stream<int> > cin, const std::string& lineComment)
: cin(cin), lineComment(lineComment) {}
ParseLocation location() { return cin->loc(); }
int next()
{
/* look if the line comment starts here */
for (size_t j=0; j<lineComment.size(); j++) {
if (cin->peek() != lineComment[j]) { cin->unget(j); goto not_found; }
cin->get();
}
/* eat all characters until the end of the line (or file) */
while (cin->peek() != '\n' && cin->peek() != EOF) cin->get();
not_found:
return cin->get();
}
private:
Ref<Stream<int> > cin;
std::string lineComment;
};
}

View File

@ -0,0 +1,48 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "stringstream.h"
namespace embree
{
static const std::string stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
/* creates map for fast categorization of characters */
static void createCharMap(bool map[256], const std::string& chrs) {
for (size_t i=0; i<256; i++) map[i] = false;
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
}
/* simple tokenizer */
StringStream::StringStream(const Ref<Stream<int> >& cin, const std::string& seps, const std::string& endl, bool multiLine)
: cin(cin), endl(endl), multiLine(multiLine)
{
createCharMap(isSepMap,seps);
createCharMap(isValidCharMap,stringChars);
}
std::string StringStream::next()
{
/* skip separators */
while (cin->peek() != EOF) {
if (endl != "" && cin->peek() == '\n') { cin->drop(); return endl; }
if (multiLine && cin->peek() == '\\') {
cin->drop();
if (cin->peek() == '\n') { cin->drop(); continue; }
cin->unget();
}
if (!isSeparator(cin->peek())) break;
cin->drop();
}
/* parse everything until the next separator */
std::vector<char> str; str.reserve(64);
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
int c = cin->get();
if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
str.push_back((char)c);
}
str.push_back(0);
return std::string(str.data());
}
}

View File

@ -0,0 +1,29 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stream.h"
namespace embree
{
/*! simple tokenizer that produces a string stream */
class StringStream : public Stream<std::string>
{
public:
StringStream(const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
const std::string& endl = "", bool multiLine = false);
public:
ParseLocation location() { return cin->loc(); }
std::string next();
private:
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
__forceinline bool isValidChar(unsigned int c) const { return c<256 && isValidCharMap[c]; }
private:
Ref<Stream<int> > cin; /*! source character stream */
bool isSepMap[256]; /*! map for fast classification of separators */
bool isValidCharMap[256]; /*! map for valid characters */
std::string endl; /*! the token of the end of line */
bool multiLine; /*! whether to parse lines wrapped with \ */
};
}

View File

@ -0,0 +1,181 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "tokenstream.h"
#include "../math/math.h"
namespace embree
{
/* shorthands for common sets of characters */
const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const std::string TokenStream::numbers = "0123456789";
const std::string TokenStream::separators = "\n\t\r ";
const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
/* creates map for fast categorization of characters */
static void createCharMap(bool map[256], const std::string& chrs) {
for (size_t i=0; i<256; i++) map[i] = false;
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
}
/* build full tokenizer that takes list of valid characters and keywords */
TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
const std::string& alpha, //< valid characters for identifiers
const std::string& seps, //< characters that act as separators
const std::vector<std::string>& symbols) //< symbols
: cin(cin), symbols(symbols)
{
createCharMap(isAlphaMap,alpha);
createCharMap(isSepMap,seps);
createCharMap(isStringCharMap,stringChars);
}
bool TokenStream::decDigits(std::string& str_o)
{
bool ok = false;
std::string str;
if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str;
else cin->unget(str.size());
return ok;
}
bool TokenStream::decDigits1(std::string& str_o)
{
bool ok = false;
std::string str;
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str; else cin->unget(str.size());
return ok;
}
bool TokenStream::trySymbol(const std::string& symbol)
{
size_t pos = 0;
while (pos < symbol.size()) {
if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
cin->drop(); pos++;
}
return true;
}
bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
{
for (size_t i=0; i<symbols.size(); i++) {
if (!trySymbol(symbols[i])) continue;
token = Token(symbols[i],Token::TY_SYMBOL,loc);
return true;
}
return false;
}
bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
{
bool ok = false;
std::string str;
if (trySymbol("nan")) {
token = Token(float(nan));
return true;
}
if (trySymbol("+inf")) {
token = Token(float(pos_inf));
return true;
}
if (trySymbol("-inf")) {
token = Token(float(neg_inf));
return true;
}
if (decDigits(str))
{
if (cin->peek() == '.') {
str += (char)cin->get();
decDigits(str);
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1.[2]E2
}
else ok = true; // 1.[2]
}
else if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1E2
}
}
else
{
if (cin->peek() == '.') {
str += (char)cin->get();
if (decDigits(str)) {
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // .3E2
}
else ok = true; // .3
}
}
}
if (ok) {
token = Token((float)atof(str.c_str()),loc);
}
else cin->unget(str.size());
return ok;
}
bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
std::string str;
if (decDigits(str)) {
token = Token(atoi(str.c_str()),loc);
return true;
}
return false;
}
bool TokenStream::tryString(Token& token, const ParseLocation& loc)
{
std::string str;
if (cin->peek() != '\"') return false;
cin->drop();
while (cin->peek() != '\"') {
const int c = cin->get();
if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
str += (char)c;
}
cin->drop();
token = Token(str,Token::TY_STRING,loc);
return true;
}
bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
{
std::string str;
if (!isAlpha(cin->peek())) return false;
str += (char)cin->get();
while (isAlphaNum(cin->peek())) str += (char)cin->get();
token = Token(str,Token::TY_IDENTIFIER,loc);
return true;
}
void TokenStream::skipSeparators()
{
/* skip separators */
while (cin->peek() != EOF && isSeparator(cin->peek()))
cin->drop();
}
Token TokenStream::next()
{
Token token;
skipSeparators();
ParseLocation loc = cin->loc();
if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
if (tryFloat (token,loc)) return token; /**< try to parse float */
if (tryInt (token,loc)) return token; /**< try to parse integer */
if (tryString (token,loc)) return token; /**< try to parse string */
if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
return Token((char)cin->get(),loc); /**< return invalid character token */
}
}

View File

@ -0,0 +1,164 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "stream.h"
#include <string>
#include <vector>
namespace embree
{
/*! token class */
class Token
{
public:
enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
static Token Eof() { return Token(); }
static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
static Token Str(std::string str) { return Token(str,TY_STRING); }
static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
char Char() const {
if (ty == TY_CHAR) return c;
THROW_RUNTIME_ERROR(loc.str()+": character expected");
}
int Int() const {
if (ty == TY_INT) return i;
THROW_RUNTIME_ERROR(loc.str()+": integer expected");
}
float Float(bool cast = true) const {
if (ty == TY_FLOAT) return f;
if (ty == TY_INT && cast) return (float)i;
THROW_RUNTIME_ERROR(loc.str()+": float expected");
}
std::string Identifier() const {
if (ty == TY_IDENTIFIER) return str;
THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
}
std::string String() const {
if (ty == TY_STRING) return str;
THROW_RUNTIME_ERROR(loc.str()+": string expected");
}
std::string Symbol() const {
if (ty == TY_SYMBOL) return str;
THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
}
const ParseLocation& Location() const { return loc; }
friend bool operator==(const Token& a, const Token& b)
{
if (a.ty != b.ty) return false;
if (a.ty == TY_CHAR) return a.c == b.c;
if (a.ty == TY_INT) return a.i == b.i;
if (a.ty == TY_FLOAT) return a.f == b.f;
if (a.ty == TY_IDENTIFIER) return a.str == b.str;
if (a.ty == TY_STRING) return a.str == b.str;
if (a.ty == TY_SYMBOL) return a.str == b.str;
return true;
}
friend bool operator!=(const Token& a, const Token& b) {
return !(a == b);
}
friend bool operator <( const Token& a, const Token& b ) {
if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
if (a.ty == TY_CHAR) return a.c < b.c;
if (a.ty == TY_INT) return a.i < b.i;
if (a.ty == TY_FLOAT) return a.f < b.f;
if (a.ty == TY_IDENTIFIER) return a.str < b.str;
if (a.ty == TY_STRING) return a.str < b.str;
if (a.ty == TY_SYMBOL) return a.str < b.str;
return false;
}
friend std::ostream& operator<<(std::ostream& cout, const Token& t)
{
if (t.ty == TY_EOF) return cout << "eof";
if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
return cout << "unknown";
}
private:
Type ty; //< the type of the token
union {
char c; //< data for char tokens
int i; //< data for int tokens
float f; //< data for float tokens
};
std::string str; //< data for string and identifier tokens
ParseLocation loc; //< the location the token is from
};
/*! build full tokenizer that takes list of valid characters and keywords */
class TokenStream : public Stream<Token>
{
public:
/*! shorthands for common sets of characters */
static const std::string alpha;
static const std::string ALPHA;
static const std::string numbers;
static const std::string separators;
static const std::string stringChars;
public:
TokenStream(const Ref<Stream<int> >& cin,
const std::string& alpha, //< valid characters for identifiers
const std::string& seps, //< characters that act as separators
const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
public:
ParseLocation location() { return cin->loc(); }
Token next();
bool trySymbol(const std::string& symbol);
private:
void skipSeparators();
bool decDigits(std::string& str);
bool decDigits1(std::string& str);
bool trySymbols(Token& token, const ParseLocation& loc);
bool tryFloat(Token& token, const ParseLocation& loc);
bool tryInt(Token& token, const ParseLocation& loc);
bool tryString(Token& token, const ParseLocation& loc);
bool tryIdentifier(Token& token, const ParseLocation& loc);
Ref<Stream<int> > cin;
bool isSepMap[256];
bool isAlphaMap[256];
bool isStringCharMap[256];
std::vector<std::string> symbols;
/*! checks if a character is a separator */
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
/*! checks if a character is a number */
__forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
/*! checks if a character is valid inside a string */
__forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
/*! checks if a character is legal for an identifier */
__forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
__forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
};
}

View File

@ -0,0 +1,361 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "linearspace2.h"
#include "linearspace3.h"
#include "quaternion.h"
#include "bbox.h"
#include "vec4.h"
namespace embree
{
#define VectorT typename L::Vector
#define ScalarT typename L::Vector::Scalar
////////////////////////////////////////////////////////////////////////////////
// Affine Space
////////////////////////////////////////////////////////////////////////////////
template<typename L>
struct AffineSpaceT
{
L l; /*< linear part of affine space */
VectorT p; /*< affine part of affine space */
////////////////////////////////////////////////////////////////////////////////
// Constructors, Assignment, Cast, Copy Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline AffineSpaceT ( ) { }
__forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; }
__forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); }
__forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; }
__forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {}
__forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {}
template<typename L1> __forceinline AffineSpaceT( const AffineSpaceT<L1>& s ) : l(s.l), p(s.p) {}
////////////////////////////////////////////////////////////////////////////////
// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {}
__forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {}
/*! return matrix for scaling */
static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); }
/*! return matrix for translation */
static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); }
/*! return matrix for rotation, only in 2D */
static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); }
/*! return matrix for rotation around arbitrary point (2D) or axis (3D) */
static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); }
/*! return matrix for rotation around arbitrary axis and point, only in 3D */
static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); }
/*! return matrix for looking at given point, only in 3D */
static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) {
VectorT Z = normalize(point-eye);
VectorT U = normalize(cross(up,Z));
VectorT V = normalize(cross(Z,U));
return AffineSpaceT(L(U,V,Z),eye);
}
};
// template specialization to get correct identity matrix for type AffineSpace3fa
template<>
__forceinline AffineSpaceT<LinearSpace3ff>::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {}
////////////////////////////////////////////////////////////////////////////////
// Unary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline AffineSpaceT<L> operator -( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(-a.l,-a.p); }
template<typename L> __forceinline AffineSpaceT<L> operator +( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(+a.l,+a.p); }
template<typename L> __forceinline AffineSpaceT<L> rcp( const AffineSpaceT<L>& a ) { L il = rcp(a.l); return AffineSpaceT<L>(il,-(il*a.p)); }
////////////////////////////////////////////////////////////////////////////////
// Binary Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline const AffineSpaceT<L> operator +( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l+b.l,a.p+b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator -( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l-b.l,a.p-b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator *( const ScalarT & a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a*b.l,a*b.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator *( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l*b.l,a.l*b.p+a.p); }
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a * rcp(b); }
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const ScalarT & b ) { return a * rcp(b); }
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a * b; }
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a * b; }
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a / b; }
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a / b; }
template<typename L> __forceinline VectorT xfmPoint (const AffineSpaceT<L>& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); }
template<typename L> __forceinline VectorT xfmVector(const AffineSpaceT<L>& m, const VectorT& v) { return xfmVector(m.l,v); }
template<typename L> __forceinline VectorT xfmNormal(const AffineSpaceT<L>& m, const VectorT& n) { return xfmNormal(m.l,n); }
__forceinline const BBox<Vec3fa> xfmBounds(const AffineSpaceT<LinearSpace3<Vec3fa> >& m, const BBox<Vec3fa>& b)
{
BBox3fa dst = empty;
const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0));
const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1));
const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2));
const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3));
const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4));
const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5));
const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6));
const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7));
return dst;
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline bool operator ==( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l == b.l && a.p == b.p; }
template<typename L> __forceinline bool operator !=( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l != b.l || a.p != b.p; }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
template<typename L> __forceinline AffineSpaceT<L> select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT<L>& t, const AffineSpaceT<L>& f ) {
return AffineSpaceT<L>(select(s,t.l,f.l),select(s,t.p,f.p));
}
////////////////////////////////////////////////////////////////////////////////
// Output Operators
////////////////////////////////////////////////////////////////////////////////
template<typename L> static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT<L>& m) {
return cout << "{ l = " << m.l << ", p = " << m.p << " }";
}
////////////////////////////////////////////////////////////////////////////////
// Template Instantiations
////////////////////////////////////////////////////////////////////////////////
typedef AffineSpaceT<LinearSpace2f> AffineSpace2f;
typedef AffineSpaceT<LinearSpace3f> AffineSpace3f;
typedef AffineSpaceT<LinearSpace3fa> AffineSpace3fa;
typedef AffineSpaceT<LinearSpace3fx> AffineSpace3fx;
typedef AffineSpaceT<LinearSpace3ff> AffineSpace3ff;
typedef AffineSpaceT<Quaternion3f > OrthonormalSpace3f;
template<int N> using AffineSpace3vf = AffineSpaceT<LinearSpace3<Vec3<vfloat<N>>>>;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<4>>>> AffineSpace3vf4;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<8>>>> AffineSpace3vf8;
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<16>>>> AffineSpace3vf16;
template<int N> using AffineSpace3vff = AffineSpaceT<LinearSpace3<Vec4<vfloat<N>>>>;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<4>>>> AffineSpace3vfa4;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<8>>>> AffineSpace3vfa8;
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<16>>>> AffineSpace3vfa16;
//////////////////////////////////////////////////////////////////////////////
/// Interpolation
//////////////////////////////////////////////////////////////////////////////
template<typename T, typename R>
__forceinline AffineSpaceT<T> lerp(const AffineSpaceT<T>& M0,
const AffineSpaceT<T>& M1,
const R& t)
{
return AffineSpaceT<T>(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t));
}
// slerp interprets the 16 floats of the matrix M = D * R * S as components of
// three matrizes (D, R, S) that are interpolated individually.
template<typename T> __forceinline AffineSpaceT<LinearSpace3<Vec3<T>>>
slerp(const AffineSpaceT<LinearSpace3<Vec4<T>>>& M0,
const AffineSpaceT<LinearSpace3<Vec4<T>>>& M1,
const T& t)
{
QuaternionT<T> q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
QuaternionT<T> q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
QuaternionT<T> q = slerp(q0, q1, t);
AffineSpaceT<LinearSpace3<Vec3<T>>> S = lerp(M0, M1, t);
AffineSpaceT<LinearSpace3<Vec3<T>>> D(one);
D.p.x = S.l.vx.y;
D.p.y = S.l.vx.z;
D.p.z = S.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
AffineSpaceT<LinearSpace3<Vec3<T>>> R = LinearSpace3<Vec3<T>>(q);
return D * R * S;
}
// this is a specialized version for Vec3fa because that does
// not play along nicely with the other templated Vec3/Vec4 types
__forceinline AffineSpace3fa slerp(const AffineSpace3ff& M0,
const AffineSpace3ff& M1,
const float& t)
{
Quaternion3f q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
Quaternion3f q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
Quaternion3f q = slerp(q0, q1, t);
AffineSpace3fa S = lerp(M0, M1, t);
AffineSpace3fa D(one);
D.p.x = S.l.vx.y;
D.p.y = S.l.vx.z;
D.p.z = S.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
AffineSpace3fa R = LinearSpace3fa(q);
return D * R * S;
}
__forceinline AffineSpace3fa quaternionDecompositionToAffineSpace(const AffineSpace3ff& qd)
{
// compute affine transform from quaternion decomposition
Quaternion3f q(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
AffineSpace3fa M = qd;
AffineSpace3fa D(one);
D.p.x = M.l.vx.y;
D.p.y = M.l.vx.z;
D.p.z = M.l.vy.z;
M.l.vx.y = 0;
M.l.vx.z = 0;
M.l.vy.z = 0;
AffineSpace3fa R = LinearSpace3fa(q);
return D * R * M;
}
__forceinline void quaternionDecomposition(const AffineSpace3ff& qd, Vec3fa& T, Quaternion3f& q, AffineSpace3fa& S)
{
q = Quaternion3f(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
S = qd;
T.x = qd.l.vx.y;
T.y = qd.l.vx.z;
T.z = qd.l.vy.z;
S.l.vx.y = 0;
S.l.vx.z = 0;
S.l.vy.z = 0;
}
__forceinline AffineSpace3fx quaternionDecomposition(Vec3fa const& T, Quaternion3f const& q, AffineSpace3fa const& S)
{
AffineSpace3ff M = S;
M.l.vx.w = q.i;
M.l.vy.w = q.j;
M.l.vz.w = q.k;
M.p.w = q.r;
M.l.vx.y = T.x;
M.l.vx.z = T.y;
M.l.vy.z = T.z;
return M;
}
struct __aligned(16) QuaternionDecomposition
{
float scale_x = 1.f;
float scale_y = 1.f;
float scale_z = 1.f;
float skew_xy = 0.f;
float skew_xz = 0.f;
float skew_yz = 0.f;
float shift_x = 0.f;
float shift_y = 0.f;
float shift_z = 0.f;
float quaternion_r = 1.f;
float quaternion_i = 0.f;
float quaternion_j = 0.f;
float quaternion_k = 0.f;
float translation_x = 0.f;
float translation_y = 0.f;
float translation_z = 0.f;
};
__forceinline QuaternionDecomposition quaternionDecomposition(AffineSpace3ff const& M)
{
QuaternionDecomposition qd;
qd.scale_x = M.l.vx.x;
qd.scale_y = M.l.vy.y;
qd.scale_z = M.l.vz.z;
qd.shift_x = M.p.x;
qd.shift_y = M.p.y;
qd.shift_z = M.p.z;
qd.translation_x = M.l.vx.y;
qd.translation_y = M.l.vx.z;
qd.translation_z = M.l.vy.z;
qd.skew_xy = M.l.vy.x;
qd.skew_xz = M.l.vz.x;
qd.skew_yz = M.l.vz.y;
qd.quaternion_r = M.p.w;
qd.quaternion_i = M.l.vx.w;
qd.quaternion_j = M.l.vy.w;
qd.quaternion_k = M.l.vz.w;
return qd;
}
////////////////////////////////////////////////////////////////////////////////
/*
* ! Template Specialization for 2D: return matrix for rotation around point
* (rotation around arbitrarty vector is not meaningful in 2D)
*/
template<> __forceinline
AffineSpace2f AffineSpace2f::rotate(const Vec2f& p, const float& r) {
return translate(+p)*AffineSpace2f(LinearSpace2f::rotate(r))*translate(-p);
}
////////////////////////////////////////////////////////////////////////////////
// Similarity Transform
//
// checks, if M is a similarity transformation, i.e if there exists a factor D
// such that for all x,y: distance(Mx, My) = D * distance(x, y)
////////////////////////////////////////////////////////////////////////////////
__forceinline bool similarityTransform(const AffineSpace3fa& M, float* D)
{
if (D) *D = 0.f;
if (abs(dot(M.l.vx, M.l.vy)) > 1e-5f) return false;
if (abs(dot(M.l.vx, M.l.vz)) > 1e-5f) return false;
if (abs(dot(M.l.vy, M.l.vz)) > 1e-5f) return false;
const float D_x = dot(M.l.vx, M.l.vx);
const float D_y = dot(M.l.vy, M.l.vy);
const float D_z = dot(M.l.vz, M.l.vz);
if (abs(D_x - D_y) > 1e-5f ||
abs(D_x - D_z) > 1e-5f ||
abs(D_y - D_z) > 1e-5f)
return false;
if (D) *D = sqrtf(D_x);
return true;
}
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
{
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
Vec3fa::storeu(&ptr->l.vy, source.l.vy);
Vec3fa::storeu(&ptr->l.vz, source.l.vz);
Vec3fa::storeu(&ptr->p, source.p);
}
__forceinline AffineSpace3fa AffineSpace3fa_load_unaligned(AffineSpace3fa* ptr)
{
AffineSpace3fa space;
space.l.vx = Vec3fa::loadu(&ptr->l.vx);
space.l.vy = Vec3fa::loadu(&ptr->l.vy);
space.l.vz = Vec3fa::loadu(&ptr->l.vz);
space.p = Vec3fa::loadu(&ptr->p);
return space;
}
#undef VectorT
#undef ScalarT
}

331
thirdparty/embree/common/math/bbox.h vendored Normal file
View File

@ -0,0 +1,331 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "vec2.h"
#include "vec3.h"
namespace embree
{
namespace internal {
template <typename T> __forceinline T divideByTwo(const T& v) { return v / T(2); }
template <> __forceinline float divideByTwo<float>(const float& v) { return v * 0.5f; }
template <> __forceinline double divideByTwo<double>(const double& v) { return v * 0.5; }
} // namespace internal
template<typename T>
struct BBox
{
T lower, upper;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline BBox ( ) { }
template<typename T1>
__forceinline BBox ( const BBox<T1>& other ) : lower(other.lower), upper(other.upper) {}
__forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; }
__forceinline BBox ( const T& v ) : lower(v), upper(v) {}
__forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {}
////////////////////////////////////////////////////////////////////////////////
/// Extending Bounds
////////////////////////////////////////////////////////////////////////////////
__forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
__forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
/*! tests if box is empty */
__forceinline bool empty() const { for (int i=0; i<T::N; i++) if (lower[i] > upper[i]) return true; return false; }
/*! computes the size of the box */
__forceinline T size() const { return upper - lower; }
/*! computes the center of the box */
__forceinline T center() const { return internal::divideByTwo<T>(lower+upper); }
/*! computes twice the center of the box */
__forceinline T center2() const { return lower+upper; }
/*! merges two boxes */
__forceinline static const BBox merge (const BBox& a, const BBox& b) {
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
}
/*! enlarge box by some scaling factor */
__forceinline BBox enlarge_by(const float a) const {
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {}
__forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {}
__forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {}
__forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {}
};
template<> __forceinline bool BBox<float>::empty() const {
return lower > upper;
}
#if defined(__SSE__)
template<> __forceinline bool BBox<Vec3fa>::empty() const {
return !all(le_mask(lower,upper));
}
template<> __forceinline bool BBox<Vec3fx>::empty() const {
return !all(le_mask(lower,upper));
}
#endif
/*! tests if box is finite */
__forceinline bool isvalid( const BBox<Vec3fa>& v ) {
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)));
}
/*! tests if box is finite and non-empty*/
__forceinline bool isvalid_non_empty( const BBox<Vec3fa>& v ) {
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper));
}
/*! tests if box has finite entries */
__forceinline bool is_finite( const BBox<Vec3fa>& b) {
return is_finite(b.lower) && is_finite(b.upper);
}
/*! test if point contained in box */
__forceinline bool inside ( const BBox<Vec3fa>& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); }
/*! computes the center of the box */
template<typename T> __forceinline const T center2(const BBox<T>& box) { return box.lower + box.upper; }
template<typename T> __forceinline const T center (const BBox<T>& box) { return internal::divideByTwo<T>(center2(box)); }
/*! computes the volume of a bounding box */
__forceinline float volume ( const BBox<Vec3fa>& b ) { return reduce_mul(b.size()); }
__forceinline float safeVolume( const BBox<Vec3fa>& b ) { if (b.empty()) return 0.0f; else return volume(b); }
/*! computes the volume of a bounding box */
__forceinline float volume( const BBox<Vec3f>& b ) { return reduce_mul(b.size()); }
/*! computes the surface area of a bounding box */
template<typename T> __forceinline const T area( const BBox<Vec2<T> >& b ) { const Vec2<T> d = b.size(); return d.x*d.y; }
template<typename T> __forceinline const T halfArea( const BBox<Vec3<T> >& b ) { return halfArea(b.size()); }
template<typename T> __forceinline const T area( const BBox<Vec3<T> >& b ) { return T(2)*halfArea(b); }
__forceinline float halfArea( const BBox<Vec3fa>& b ) { return halfArea(b.size()); }
__forceinline float area( const BBox<Vec3fa>& b ) { return 2.0f*halfArea(b); }
__forceinline float halfArea( const BBox<Vec3fx>& b ) { return halfArea(b.size()); }
__forceinline float area( const BBox<Vec3fx>& b ) { return 2.0f*halfArea(b); }
template<typename Vec> __forceinline float safeArea( const BBox<Vec>& b ) { if (b.empty()) return 0.0f; else return area(b); }
template<typename T> __forceinline float expectedApproxHalfArea(const BBox<T>& box) {
return halfArea(box);
}
/*! merges bounding boxes and points */
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const T& b ) { return BBox<T>(min(a.lower, b ), max(a.upper, b )); }
template<typename T> __forceinline const BBox<T> merge( const T& a, const BBox<T>& b ) { return BBox<T>(min(a , b.lower), max(a , b.upper)); }
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(min(a.lower, b.lower), max(a.upper, b.upper)); }
/*! Merges three boxes. */
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return merge(a,merge(b,c)); }
/*! Merges four boxes. */
template<typename T> __forceinline BBox<T> merge(const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d) {
return merge(merge(a,b),merge(c,d));
}
/*! Comparison Operators */
template<typename T> __forceinline bool operator==( const BBox<T>& a, const BBox<T>& b ) { return a.lower == b.lower && a.upper == b.upper; }
template<typename T> __forceinline bool operator!=( const BBox<T>& a, const BBox<T>& b ) { return a.lower != b.lower || a.upper != b.upper; }
/*! scaling */
template<typename T> __forceinline BBox<T> operator *( const float& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
template<typename T> __forceinline BBox<T> operator *( const T& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
/*! translations */
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower+b.lower,a.upper+b.upper); }
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower-b.lower,a.upper-b.upper); }
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower+b ,a.upper+b ); }
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower-b ,a.upper-b ); }
/*! extension */
template<typename T> __forceinline BBox<T> enlarge(const BBox<T>& a, const T& b) { return BBox<T>(a.lower-b, a.upper+b); }
/*! intersect bounding boxes */
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(max(a.lower, b.lower), min(a.upper, b.upper)); }
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return intersect(a,intersect(b,c)); }
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d ) { return intersect(intersect(a,b),intersect(c,d)); }
/*! subtract bounds from each other */
template<typename T> __forceinline void subtract(const BBox<T>& a, const BBox<T>& b, BBox<T>& c, BBox<T>& d)
{
c.lower = a.lower;
c.upper = min(a.upper,b.lower);
d.lower = max(a.lower,b.upper);
d.upper = a.upper;
}
/*! tests if bounding boxes (and points) are disjoint (empty intersection) */
template<typename T> __inline bool disjoint( const BBox<T>& a, const BBox<T>& b ) { return intersect(a,b).empty(); }
template<typename T> __inline bool disjoint( const BBox<T>& a, const T& b ) { return disjoint(a,BBox<T>(b)); }
template<typename T> __inline bool disjoint( const T& a, const BBox<T>& b ) { return disjoint(BBox<T>(a),b); }
/*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */
template<typename T> __inline bool conjoint( const BBox<T>& a, const BBox<T>& b ) { return !intersect(a,b).empty(); }
template<typename T> __inline bool conjoint( const BBox<T>& a, const T& b ) { return conjoint(a,BBox<T>(b)); }
template<typename T> __inline bool conjoint( const T& a, const BBox<T>& b ) { return conjoint(BBox<T>(a),b); }
/*! subset relation */
template<typename T> __inline bool subset( const BBox<T>& a, const BBox<T>& b )
{
for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false;
for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false;
return true;
}
template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
}
template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
}
/*! blending */
template<typename T>
__forceinline BBox<T> lerp(const BBox<T>& b0, const BBox<T>& b1, const float t) {
return BBox<T>(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t));
}
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const BBox<T>& box) {
return cout << "[" << box.lower << "; " << box.upper << "]";
}
/*! default template instantiations */
typedef BBox<float> BBox1f;
typedef BBox<Vec2f> BBox2f;
typedef BBox<Vec2fa> BBox2fa;
typedef BBox<Vec3f> BBox3f;
typedef BBox<Vec3fa> BBox3fa;
typedef BBox<Vec3fx> BBox3fx;
typedef BBox<Vec3ff> BBox3ff;
}
////////////////////////////////////////////////////////////////////////////////
/// SSE / AVX / MIC specializations
////////////////////////////////////////////////////////////////////////////////
#if defined __SSE__
#include "../simd/sse.h"
#endif
#if defined __AVX__
#include "../simd/avx.h"
#endif
#if defined(__AVX512F__)
#include "../simd/avx512.h"
#endif
namespace embree
{
template<int N>
__forceinline BBox<Vec3<vfloat<N>>> transpose(const BBox3fa* bounds);
template<>
__forceinline BBox<Vec3<vfloat4>> transpose<4>(const BBox3fa* bounds)
{
BBox<Vec3<vfloat4>> dest;
transpose((vfloat4&)bounds[0].lower,
(vfloat4&)bounds[1].lower,
(vfloat4&)bounds[2].lower,
(vfloat4&)bounds[3].lower,
dest.lower.x,
dest.lower.y,
dest.lower.z);
transpose((vfloat4&)bounds[0].upper,
(vfloat4&)bounds[1].upper,
(vfloat4&)bounds[2].upper,
(vfloat4&)bounds[3].upper,
dest.upper.x,
dest.upper.y,
dest.upper.z);
return dest;
}
#if defined(__AVX__)
template<>
__forceinline BBox<Vec3<vfloat8>> transpose<8>(const BBox3fa* bounds)
{
BBox<Vec3<vfloat8>> dest;
transpose((vfloat4&)bounds[0].lower,
(vfloat4&)bounds[1].lower,
(vfloat4&)bounds[2].lower,
(vfloat4&)bounds[3].lower,
(vfloat4&)bounds[4].lower,
(vfloat4&)bounds[5].lower,
(vfloat4&)bounds[6].lower,
(vfloat4&)bounds[7].lower,
dest.lower.x,
dest.lower.y,
dest.lower.z);
transpose((vfloat4&)bounds[0].upper,
(vfloat4&)bounds[1].upper,
(vfloat4&)bounds[2].upper,
(vfloat4&)bounds[3].upper,
(vfloat4&)bounds[4].upper,
(vfloat4&)bounds[5].upper,
(vfloat4&)bounds[6].upper,
(vfloat4&)bounds[7].upper,
dest.upper.x,
dest.upper.y,
dest.upper.z);
return dest;
}
#endif
template<int N>
__forceinline BBox3fa merge(const BBox3fa* bounds);
template<>
__forceinline BBox3fa merge<4>(const BBox3fa* bounds)
{
const Vec3fa lower = min(min(bounds[0].lower,bounds[1].lower),
min(bounds[2].lower,bounds[3].lower));
const Vec3fa upper = max(max(bounds[0].upper,bounds[1].upper),
max(bounds[2].upper,bounds[3].upper));
return BBox3fa(lower,upper);
}
#if defined(__AVX__)
template<>
__forceinline BBox3fa merge<8>(const BBox3fa* bounds)
{
const Vec3fa lower = min(min(min(bounds[0].lower,bounds[1].lower),min(bounds[2].lower,bounds[3].lower)),
min(min(bounds[4].lower,bounds[5].lower),min(bounds[6].lower,bounds[7].lower)));
const Vec3fa upper = max(max(max(bounds[0].upper,bounds[1].upper),max(bounds[2].upper,bounds[3].upper)),
max(max(bounds[4].upper,bounds[5].upper),max(bounds[6].upper,bounds[7].upper)));
return BBox3fa(lower,upper);
}
#endif
}

47
thirdparty/embree/common/math/col3.h vendored Normal file
View File

@ -0,0 +1,47 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "math.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// RGB Color Class
////////////////////////////////////////////////////////////////////////////////
template<typename T> struct Col3
{
T r, g, b;
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Col3 ( ) { }
__forceinline Col3 ( const Col3& other ) { r = other.r; g = other.g; b = other.b; }
__forceinline Col3& operator=( const Col3& other ) { r = other.r; g = other.g; b = other.b; return *this; }
__forceinline explicit Col3 (const T& v) : r(v), g(v), b(v) {}
__forceinline Col3 (const T& r, const T& g, const T& b) : r(r), g(g), b(b) {}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Col3 (ZeroTy) : r(zero) , g(zero) , b(zero) {}
__forceinline Col3 (OneTy) : r(one) , g(one) , b(one) {}
__forceinline Col3 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf) {}
__forceinline Col3 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf) {}
};
/*! output operator */
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col3<T>& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
/*! default template instantiations */
typedef Col3<unsigned char> Col3uc;
typedef Col3<float > Col3f;
}

Some files were not shown because too many files have changed in this diff Show More