From 0db512354807318646ac9884f3702733a56b3bb1 Mon Sep 17 00:00:00 2001 From: Hein-Pieter van Braam Date: Wed, 20 Dec 2017 14:30:50 +0100 Subject: [PATCH] Prevent false sharing in lightbaker RNG state The previous commit corrected the RNG behavior for the lightbaker but also made it significantly slower on high core count systems. Due to the vector of states being physically close together in RAM we force a cache synchronization across all cores whenever we call for the next random number to be generated. This will create a temporary local copy of the RNG state before entering the loop and then saving it back to the global state when done. This will preserve the per-thread RNG state (and random number quality) while significantly improving performance. On my 16 thread box it saves 3 minutes baking the Sponza scene, bringing performance back in line to before the various RNG fixes were introduced, being slightly faster than the first implementation. --- scene/3d/voxel_light_baker.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scene/3d/voxel_light_baker.cpp b/scene/3d/voxel_light_baker.cpp index 4c52d0bb1bc..bf0f801e329 100644 --- a/scene/3d/voxel_light_baker.cpp +++ b/scene/3d/voxel_light_baker.cpp @@ -1711,11 +1711,14 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V const Light *light = bake_light.ptr(); const Cell *cells = bake_cells.ptr(); + // Prevent false sharing when running on OpenMP + uint32_t local_rng_state = *rng_state; + for (int i = 0; i < samples; i++) { - float random_angle1 = (((xorshift32(rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread; + float random_angle1 = (((xorshift32(&local_rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread; Vector3 axis(0, sin(random_angle1), cos(random_angle1)); - float random_angle2 = ((xorshift32(rng_state) % 65535) / 65535.0) * Math_PI * 2.0; + float random_angle2 = ((xorshift32(&local_rng_state) % 65535) / 65535.0) * Math_PI * 2.0; Basis rot(Vector3(0, 0, 1), random_angle2); axis = rot.xform(axis); @@ -1792,6 +1795,8 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V } } + // Make sure we don't reset this thread's RNG state + *rng_state = local_rng_state; return accum / samples; }