Merge pull request #86121 from lawnjelly/occlusion_cull_jitter

Jitter raster occlusion camera to reduce false positives.
This commit is contained in:
Rémi Verschelde 2024-04-04 14:30:42 +02:00
commit 406d9426cb
No known key found for this signature in database
GPG Key ID: C3336907360768E1
8 changed files with 129 additions and 9 deletions

View File

@ -1523,6 +1523,8 @@ ProjectSettings::ProjectSettings() {
GLOBAL_DEF("debug/settings/crash_handler/message.editor", GLOBAL_DEF("debug/settings/crash_handler/message.editor",
String("Please include this when reporting the bug on: https://github.com/godotengine/godot/issues")); String("Please include this when reporting the bug on: https://github.com/godotengine/godot/issues"));
GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/occlusion_culling/bvh_build_quality", PROPERTY_HINT_ENUM, "Low,Medium,High"), 2); GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/occlusion_culling/bvh_build_quality", PROPERTY_HINT_ENUM, "Low,Medium,High"), 2);
GLOBAL_DEF_RST("rendering/occlusion_culling/jitter_projection", true);
GLOBAL_DEF_RST("internationalization/rendering/force_right_to_left_layout_direction", false); GLOBAL_DEF_RST("internationalization/rendering/force_right_to_left_layout_direction", false);
GLOBAL_DEF_BASIC(PropertyInfo(Variant::INT, "internationalization/rendering/root_node_layout_direction", PROPERTY_HINT_ENUM, "Based on Application Locale,Left-to-Right,Right-to-Left,Based on System Locale"), 0); GLOBAL_DEF_BASIC(PropertyInfo(Variant::INT, "internationalization/rendering/root_node_layout_direction", PROPERTY_HINT_ENUM, "Based on Application Locale,Left-to-Right,Right-to-Left,Based on System Locale"), 0);

View File

@ -2643,6 +2643,9 @@
The [url=https://en.wikipedia.org/wiki/Bounding_volume_hierarchy]Bounding Volume Hierarchy[/url] quality to use when rendering the occlusion culling buffer. Higher values will result in more accurate occlusion culling, at the cost of higher CPU usage. See also [member rendering/occlusion_culling/occlusion_rays_per_thread]. The [url=https://en.wikipedia.org/wiki/Bounding_volume_hierarchy]Bounding Volume Hierarchy[/url] quality to use when rendering the occlusion culling buffer. Higher values will result in more accurate occlusion culling, at the cost of higher CPU usage. See also [member rendering/occlusion_culling/occlusion_rays_per_thread].
[b]Note:[/b] This property is only read when the project starts. To adjust the BVH build quality at runtime, use [method RenderingServer.viewport_set_occlusion_culling_build_quality]. [b]Note:[/b] This property is only read when the project starts. To adjust the BVH build quality at runtime, use [method RenderingServer.viewport_set_occlusion_culling_build_quality].
</member> </member>
<member name="rendering/occlusion_culling/jitter_projection" type="bool" setter="" getter="" default="true">
If [code]true[/code], the projection used for rendering the occlusion buffer will be jittered. This can help prevent objects being incorrectly culled when visible through small gaps.
</member>
<member name="rendering/occlusion_culling/occlusion_rays_per_thread" type="int" setter="" getter="" default="512"> <member name="rendering/occlusion_culling/occlusion_rays_per_thread" type="int" setter="" getter="" default="512">
The number of occlusion rays traced per CPU thread. Higher values will result in more accurate occlusion culling, at the cost of higher CPU usage. The occlusion culling buffer's pixel count is roughly equal to [code]occlusion_rays_per_thread * number_of_logical_cpu_cores[/code], so it will depend on the system's CPU. Therefore, CPUs with fewer cores will use a lower resolution to attempt keeping performance costs even across devices. See also [member rendering/occlusion_culling/bvh_build_quality]. The number of occlusion rays traced per CPU thread. Higher values will result in more accurate occlusion culling, at the cost of higher CPU usage. The occlusion culling buffer's pixel count is roughly equal to [code]occlusion_rays_per_thread * number_of_logical_cpu_cores[/code], so it will depend on the system's CPU. Therefore, CPUs with fewer cores will use a lower resolution to attempt keeping performance costs even across devices. See also [member rendering/occlusion_culling/bvh_build_quality].
[b]Note:[/b] This property is only read when the project starts. To adjust the number of occlusion rays traced per thread at runtime, use [method RenderingServer.viewport_set_occlusion_rays_per_thread]. [b]Note:[/b] This property is only read when the project starts. To adjust the number of occlusion rays traced per thread at runtime, use [method RenderingServer.viewport_set_occlusion_rays_per_thread].

View File

@ -536,6 +536,64 @@ void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size)
buffers[p_buffer].resize(p_size); buffers[p_buffer].resize(p_size);
} }
Projection RaycastOcclusionCull::_jitter_projection(const Projection &p_cam_projection, const Size2i &p_viewport_size) {
if (!_jitter_enabled) {
return p_cam_projection;
}
// Prevent divide by zero when using NULL viewport.
if ((p_viewport_size.x <= 0) || (p_viewport_size.y <= 0)) {
return p_cam_projection;
}
Projection p = p_cam_projection;
int32_t frame = Engine::get_singleton()->get_frames_drawn();
frame %= 9;
Vector2 jitter;
switch (frame) {
default:
break;
case 1: {
jitter = Vector2(-1, -1);
} break;
case 2: {
jitter = Vector2(1, -1);
} break;
case 3: {
jitter = Vector2(-1, 1);
} break;
case 4: {
jitter = Vector2(1, 1);
} break;
case 5: {
jitter = Vector2(-0.5f, -0.5f);
} break;
case 6: {
jitter = Vector2(0.5f, -0.5f);
} break;
case 7: {
jitter = Vector2(-0.5f, 0.5f);
} break;
case 8: {
jitter = Vector2(0.5f, 0.5f);
} break;
}
// The multiplier here determines the divergence from center,
// and is to some extent a balancing act.
// Higher divergence gives fewer false hidden, but more false shown.
// False hidden is obvious to viewer, false shown is not.
// False shown can lower percentage that are occluded, and therefore performance.
jitter *= Vector2(1 / (float)p_viewport_size.x, 1 / (float)p_viewport_size.y) * 0.05f;
p.add_jitter_offset(jitter);
return p;
}
void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) { void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
if (!buffers.has(p_buffer)) { if (!buffers.has(p_buffer)) {
return; return;
@ -550,7 +608,9 @@ void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_
Scenario &scenario = scenarios[buffer.scenario_rid]; Scenario &scenario = scenarios[buffer.scenario_rid];
scenario.update(); scenario.update();
buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal); Projection jittered_proj = _jitter_projection(p_cam_projection, buffer.get_occlusion_buffer_size());
buffer.update_camera_rays(p_cam_transform, jittered_proj, p_cam_orthogonal);
scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count); scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count);
buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal); buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal);
@ -596,6 +656,7 @@ void RaycastOcclusionCull::_init_embree() {
RaycastOcclusionCull::RaycastOcclusionCull() { RaycastOcclusionCull::RaycastOcclusionCull() {
raycast_singleton = this; raycast_singleton = this;
int default_quality = GLOBAL_GET("rendering/occlusion_culling/bvh_build_quality"); int default_quality = GLOBAL_GET("rendering/occlusion_culling/bvh_build_quality");
_jitter_enabled = GLOBAL_GET("rendering/occlusion_culling/jitter_projection");
build_quality = RS::ViewportOcclusionCullingBuildQuality(default_quality); build_quality = RS::ViewportOcclusionCullingBuildQuality(default_quality);
} }

View File

@ -163,8 +163,10 @@ private:
HashMap<RID, Scenario> scenarios; HashMap<RID, Scenario> scenarios;
HashMap<RID, RaycastHZBuffer> buffers; HashMap<RID, RaycastHZBuffer> buffers;
RS::ViewportOcclusionCullingBuildQuality build_quality; RS::ViewportOcclusionCullingBuildQuality build_quality;
bool _jitter_enabled = false;
void _init_embree(); void _init_embree();
Projection _jitter_projection(const Projection &p_cam_projection, const Size2i &p_viewport_size);
public: public:
virtual bool is_occluder(RID p_rid) override; virtual bool is_occluder(RID p_rid) override;

View File

@ -1720,6 +1720,7 @@ void RendererSceneCull::_update_instance(Instance *p_instance) {
idata.base_rid = p_instance->base; idata.base_rid = p_instance->base;
idata.parent_array_index = p_instance->visibility_parent ? p_instance->visibility_parent->array_index : -1; idata.parent_array_index = p_instance->visibility_parent ? p_instance->visibility_parent->array_index : -1;
idata.visibility_index = p_instance->visibility_index; idata.visibility_index = p_instance->visibility_index;
idata.occlusion_timeout = 0;
for (Instance *E : p_instance->visibility_dependencies) { for (Instance *E : p_instance->visibility_dependencies) {
Instance *dep_instance = E; Instance *dep_instance = E;
@ -2775,7 +2776,7 @@ void RendererSceneCull::_scene_cull(CullData &cull_data, InstanceCullResult &cul
#define VIS_RANGE_CHECK ((idata.visibility_index == -1) || _visibility_range_check<false>(cull_data.scenario->instance_visibility[idata.visibility_index], cull_data.cam_transform.origin, cull_data.visibility_viewport_mask) == 0) #define VIS_RANGE_CHECK ((idata.visibility_index == -1) || _visibility_range_check<false>(cull_data.scenario->instance_visibility[idata.visibility_index], cull_data.cam_transform.origin, cull_data.visibility_viewport_mask) == 0)
#define VIS_PARENT_CHECK (_visibility_parent_check(cull_data, idata)) #define VIS_PARENT_CHECK (_visibility_parent_check(cull_data, idata))
#define VIS_CHECK (visibility_check < 0 ? (visibility_check = (visibility_flags != InstanceData::FLAG_VISIBILITY_DEPENDENCY_NEEDS_CHECK || (VIS_RANGE_CHECK && VIS_PARENT_CHECK))) : visibility_check) #define VIS_CHECK (visibility_check < 0 ? (visibility_check = (visibility_flags != InstanceData::FLAG_VISIBILITY_DEPENDENCY_NEEDS_CHECK || (VIS_RANGE_CHECK && VIS_PARENT_CHECK))) : visibility_check)
#define OCCLUSION_CULLED (cull_data.occlusion_buffer != nullptr && (cull_data.scenario->instance_data[i].flags & InstanceData::FLAG_IGNORE_OCCLUSION_CULLING) == 0 && cull_data.occlusion_buffer->is_occluded(cull_data.scenario->instance_aabbs[i].bounds, cull_data.cam_transform.origin, inv_cam_transform, *cull_data.camera_matrix, z_near)) #define OCCLUSION_CULLED (cull_data.occlusion_buffer != nullptr && (cull_data.scenario->instance_data[i].flags & InstanceData::FLAG_IGNORE_OCCLUSION_CULLING) == 0 && cull_data.occlusion_buffer->is_occluded(cull_data.scenario->instance_aabbs[i].bounds, cull_data.cam_transform.origin, inv_cam_transform, *cull_data.camera_matrix, z_near, cull_data.scenario->instance_data[i].occlusion_timeout))
if (!HIDDEN_BY_VISIBILITY_CHECKS) { if (!HIDDEN_BY_VISIBILITY_CHECKS) {
if ((LAYER_CHECK && IN_FRUSTUM(cull_data.cull->frustum) && VIS_CHECK && !OCCLUSION_CULLED) || (cull_data.scenario->instance_data[i].flags & InstanceData::FLAG_IGNORE_ALL_CULLING)) { if ((LAYER_CHECK && IN_FRUSTUM(cull_data.cull->frustum) && VIS_CHECK && !OCCLUSION_CULLED) || (cull_data.scenario->instance_data[i].flags & InstanceData::FLAG_IGNORE_ALL_CULLING)) {
@ -4252,6 +4253,7 @@ RendererSceneCull::RendererSceneCull() {
indexer_update_iterations = GLOBAL_GET("rendering/limits/spatial_indexer/update_iterations_per_frame"); indexer_update_iterations = GLOBAL_GET("rendering/limits/spatial_indexer/update_iterations_per_frame");
thread_cull_threshold = GLOBAL_GET("rendering/limits/spatial_indexer/threaded_cull_minimum_instances"); thread_cull_threshold = GLOBAL_GET("rendering/limits/spatial_indexer/threaded_cull_minimum_instances");
thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)WorkerThreadPool::get_singleton()->get_thread_count()); //make sure there is at least one thread per CPU thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)WorkerThreadPool::get_singleton()->get_thread_count()); //make sure there is at least one thread per CPU
RendererSceneOcclusionCull::HZBuffer::occlusion_jitter_enabled = GLOBAL_GET("rendering/occlusion_culling/jitter_projection");
dummy_occlusion_culling = memnew(RendererSceneOcclusionCull); dummy_occlusion_culling = memnew(RendererSceneOcclusionCull);

View File

@ -286,6 +286,13 @@ public:
Instance *instance = nullptr; Instance *instance = nullptr;
int32_t parent_array_index = -1; int32_t parent_array_index = -1;
int32_t visibility_index = -1; int32_t visibility_index = -1;
// Each time occlusion culling determines an instance is visible,
// set this to occlusion_frame plus some delay.
// Once the timeout is reached, allow the instance to be occlusion culled.
// This creates a delay for occlusion culling, which prevents flickering
// when jittering the raster occlusion projection.
uint64_t occlusion_timeout = 0;
}; };
struct InstanceVisibilityData { struct InstanceVisibilityData {

View File

@ -43,6 +43,8 @@ const Vector3 RendererSceneOcclusionCull::HZBuffer::corners[8] = {
Vector3(1, 1, 1) Vector3(1, 1, 1)
}; };
bool RendererSceneOcclusionCull::HZBuffer::occlusion_jitter_enabled = false;
bool RendererSceneOcclusionCull::HZBuffer::is_empty() const { bool RendererSceneOcclusionCull::HZBuffer::is_empty() const {
return sizes.is_empty(); return sizes.is_empty();
} }
@ -66,6 +68,8 @@ void RendererSceneOcclusionCull::HZBuffer::clear() {
} }
void RendererSceneOcclusionCull::HZBuffer::resize(const Size2i &p_size) { void RendererSceneOcclusionCull::HZBuffer::resize(const Size2i &p_size) {
occlusion_buffer_size = p_size;
if (p_size == Size2i()) { if (p_size == Size2i()) {
clear(); clear();
return; return;
@ -124,6 +128,9 @@ void RendererSceneOcclusionCull::HZBuffer::resize(const Size2i &p_size) {
} }
void RendererSceneOcclusionCull::HZBuffer::update_mips() { void RendererSceneOcclusionCull::HZBuffer::update_mips() {
// Keep this up to date as a local to be used for occlusion timers.
occlusion_frame = Engine::get_singleton()->get_frames_drawn();
if (sizes.is_empty()) { if (sizes.is_empty()) {
return; return;
} }

View File

@ -53,14 +53,10 @@ public:
PackedByteArray debug_data; PackedByteArray debug_data;
float debug_tex_range = 0.0f; float debug_tex_range = 0.0f;
public: uint64_t occlusion_frame = 0;
bool is_empty() const; Size2i occlusion_buffer_size;
virtual void clear();
virtual void resize(const Size2i &p_size);
void update_mips(); _FORCE_INLINE_ bool _is_occluded(const real_t p_bounds[6], const Vector3 &p_cam_position, const Transform3D &p_cam_inv_transform, const Projection &p_cam_projection, real_t p_near) const {
_FORCE_INLINE_ bool is_occluded(const real_t p_bounds[6], const Vector3 &p_cam_position, const Transform3D &p_cam_inv_transform, const Projection &p_cam_projection, real_t p_near) const {
if (is_empty()) { if (is_empty()) {
return false; return false;
} }
@ -154,7 +150,47 @@ public:
return !visible; return !visible;
} }
public:
static bool occlusion_jitter_enabled;
bool is_empty() const;
virtual void clear();
virtual void resize(const Size2i &p_size);
void update_mips();
// Thin wrapper around _is_occluded(),
// allowing occlusion timers to delay the disappearance
// of objects to prevent flickering when using jittering.
_FORCE_INLINE_ bool is_occluded(const real_t p_bounds[6], const Vector3 &p_cam_position, const Transform3D &p_cam_inv_transform, const Projection &p_cam_projection, real_t p_near, uint64_t &r_occlusion_timeout) const {
bool occluded = _is_occluded(p_bounds, p_cam_position, p_cam_inv_transform, p_cam_projection, p_near);
// Special case, temporal jitter disabled,
// so we don't use occlusion timers.
if (!occlusion_jitter_enabled) {
return occluded;
}
if (!occluded) {
//#define DEBUG_RASTER_OCCLUSION_JITTER
#ifdef DEBUG_RASTER_OCCLUSION_JITTER
r_occlusion_timeout = occlusion_frame + 1;
#else
r_occlusion_timeout = occlusion_frame + 9;
#endif
} else if (r_occlusion_timeout) {
// Regular timeout, allow occlusion culling
// to proceed as normal after the delay.
if (occlusion_frame >= r_occlusion_timeout) {
r_occlusion_timeout = 0;
}
}
return occluded && !r_occlusion_timeout;
}
RID get_debug_texture(); RID get_debug_texture();
const Size2i &get_occlusion_buffer_size() const { return occlusion_buffer_size; }
virtual ~HZBuffer(){}; virtual ~HZBuffer(){};
}; };