From b447e1baaf050ed242ab332191532817e3c200a0 Mon Sep 17 00:00:00 2001 From: Dario Date: Tue, 8 Aug 2023 09:46:06 -0300 Subject: [PATCH] Improve handling of motion vectors for multimesh instances. Fixes #67287. There was a subtle error where due to how enabling motion vectors for multi-meshes was handled, only the first instance would have a valid transforms buffer and the rest would point to an invalid buffer. This change moves over the responsibility of enabling motion vectors only when changes happen to the individual 3D transforms or the entire buffer itself. It also fixes an unnecessary download of the existing buffer that'd get overwritten by the current cache if it exists. Another fix is handling the case where the buffer was not set, and enabling motion vectors would not cause the buffer to be recreated correctly. (cherry picked from commit 5155870d644b1598cd4c8dd675583c3fa1b79d60) --- .../render_forward_clustered.cpp | 7 --- .../renderer_rd/storage_rd/mesh_storage.cpp | 52 ++++++++++++------- .../renderer_rd/storage_rd/mesh_storage.h | 2 +- servers/rendering/renderer_viewport.cpp | 9 ++++ servers/rendering/renderer_viewport.h | 3 ++ 5 files changed, 47 insertions(+), 26 deletions(-) diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index ce0b4a9f9c4..d9a3fb38868 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -1029,13 +1029,6 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DEPTH_TEXTURE) { scene_state.used_depth_texture = true; } - - if (p_color_pass_flags & COLOR_PASS_FLAG_MOTION_VECTORS) { - if ((flags & (INSTANCE_DATA_FLAG_MULTIMESH | INSTANCE_DATA_FLAG_PARTICLES)) == INSTANCE_DATA_FLAG_MULTIMESH && RendererRD::MeshStorage::get_singleton()->_multimesh_enable_motion_vectors(inst->data->base)) { - inst->transforms_uniform_set = mesh_storage->multimesh_get_3d_uniform_set(inst->data->base, scene_shader.default_shader_rd, TRANSFORMS_UNIFORM_SET); - } - } - } else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) { rl->add_element(surf); diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index 67b5cdd291f..4bfec8ae8dc 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1289,12 +1289,9 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS:: multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH); } -bool MeshStorage::_multimesh_enable_motion_vectors(RID p_multimesh) { - MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); - ERR_FAIL_COND_V(!multimesh, false); - +void MeshStorage::_multimesh_enable_motion_vectors(MultiMesh *multimesh) { if (multimesh->motion_vectors_enabled) { - return false; + return; } multimesh->motion_vectors_enabled = true; @@ -1307,22 +1304,30 @@ bool MeshStorage::_multimesh_enable_motion_vectors(RID p_multimesh) { multimesh->data_cache.append_array(multimesh->data_cache); } - if (multimesh->buffer_set) { + uint32_t buffer_size = multimesh->instances * multimesh->stride_cache * sizeof(float); + uint32_t new_buffer_size = buffer_size * 2; + RID new_buffer = RD::get_singleton()->storage_buffer_create(new_buffer_size); + + if (multimesh->buffer_set && multimesh->data_cache.is_empty()) { + // If the buffer was set but there's no data cached in the CPU, we must download it from the GPU and + // upload it because RD does not provide a way to copy the buffer directly yet. RD::get_singleton()->barrier(); Vector buffer_data = RD::get_singleton()->buffer_get_data(multimesh->buffer); - if (!multimesh->data_cache.is_empty()) { - memcpy(buffer_data.ptrw(), multimesh->data_cache.ptr(), buffer_data.size()); - } - - RD::get_singleton()->free(multimesh->buffer); - uint32_t buffer_size = multimesh->instances * multimesh->stride_cache * sizeof(float) * 2; - multimesh->buffer = RD::get_singleton()->storage_buffer_create(buffer_size); - RD::get_singleton()->buffer_update(multimesh->buffer, 0, buffer_data.size(), buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER); - RD::get_singleton()->buffer_update(multimesh->buffer, buffer_data.size(), buffer_data.size(), buffer_data.ptr()); - multimesh->uniform_set_3d = RID(); // Cleared by dependency - return true; + ERR_FAIL_COND(buffer_data.size() != int(buffer_size)); + RD::get_singleton()->buffer_update(new_buffer, 0, buffer_size, buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER); + RD::get_singleton()->buffer_update(new_buffer, buffer_size, buffer_size, buffer_data.ptr()); + } else if (!multimesh->data_cache.is_empty()) { + // Simply upload the data cached in the CPU, which should already be doubled in size. + ERR_FAIL_COND(multimesh->data_cache.size() != int(new_buffer_size)); + RD::get_singleton()->buffer_update(new_buffer, 0, new_buffer_size, multimesh->data_cache.ptr()); } - return false; // Update the transforms uniform set cache + + if (multimesh->buffer.is_valid()) { + RD::get_singleton()->free(multimesh->buffer); + } + + multimesh->buffer = new_buffer; + multimesh->uniform_set_3d = RID(); // Cleared by dependency. } void MeshStorage::_multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset) { @@ -1531,6 +1536,12 @@ void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index, ERR_FAIL_COND(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_3D); _multimesh_make_local(multimesh); + + bool uses_motion_vectors = (RSG::viewport->get_num_viewports_with_motion_vectors() > 0); + if (uses_motion_vectors) { + _multimesh_enable_motion_vectors(multimesh); + } + _multimesh_update_motion_vectors_data_cache(multimesh); { @@ -1749,6 +1760,11 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector &p_b ERR_FAIL_COND(!multimesh); ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache)); + bool uses_motion_vectors = (RSG::viewport->get_num_viewports_with_motion_vectors() > 0); + if (uses_motion_vectors) { + _multimesh_enable_motion_vectors(multimesh); + } + if (multimesh->motion_vectors_enabled) { uint32_t frame = RSG::rasterizer->get_frame_number(); diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h index 4d46a62a76c..c23a5b1449c 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h @@ -234,6 +234,7 @@ private: MultiMesh *multimesh_dirty_list = nullptr; _FORCE_INLINE_ void _multimesh_make_local(MultiMesh *multimesh) const; + _FORCE_INLINE_ void _multimesh_enable_motion_vectors(MultiMesh *multimesh); _FORCE_INLINE_ void _multimesh_update_motion_vectors_data_cache(MultiMesh *multimesh); _FORCE_INLINE_ void _multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb); _FORCE_INLINE_ void _multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb); @@ -593,7 +594,6 @@ public: virtual AABB multimesh_get_aabb(RID p_multimesh) const override; void _update_dirty_multimeshes(); - bool _multimesh_enable_motion_vectors(RID p_multimesh); void _multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset); _FORCE_INLINE_ RS::MultimeshTransformFormat multimesh_get_transform_format(RID p_multimesh) const { diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp index 71f6a28671f..c8a7ee95e81 100644 --- a/servers/rendering/renderer_viewport.cpp +++ b/servers/rendering/renderer_viewport.cpp @@ -1171,6 +1171,7 @@ void RendererViewport::viewport_set_use_taa(RID p_viewport, bool p_use_taa) { return; } viewport->use_taa = p_use_taa; + num_viewports_with_motion_vectors += p_use_taa ? 1 : -1; _configure_3d_render_buffers(viewport); } @@ -1355,6 +1356,10 @@ bool RendererViewport::free(RID p_rid) { RendererSceneOcclusionCull::get_singleton()->remove_buffer(p_rid); } + if (viewport->use_taa) { + num_viewports_with_motion_vectors--; + } + viewport_owner.free(p_rid); return true; @@ -1410,6 +1415,10 @@ int RendererViewport::get_total_draw_calls_used() const { return total_draw_calls_used; } +int RendererViewport::get_num_viewports_with_motion_vectors() const { + return num_viewports_with_motion_vectors; +} + RendererViewport::RendererViewport() { occlusion_rays_per_thread = GLOBAL_GET("rendering/occlusion_culling/occlusion_rays_per_thread"); } diff --git a/servers/rendering/renderer_viewport.h b/servers/rendering/renderer_viewport.h index c004d05b23c..23bdf3cd28f 100644 --- a/servers/rendering/renderer_viewport.h +++ b/servers/rendering/renderer_viewport.h @@ -196,6 +196,8 @@ public: int total_vertices_drawn = 0; int total_draw_calls_used = 0; + int num_viewports_with_motion_vectors = 0; + private: Vector _sort_active_viewports(); void _viewport_set_size(Viewport *p_viewport, int p_width, int p_height, uint32_t p_view_count); @@ -299,6 +301,7 @@ public: int get_total_objects_drawn() const; int get_total_primitives_drawn() const; int get_total_draw_calls_used() const; + int get_num_viewports_with_motion_vectors() const; // Workaround for setting this on thread. void call_set_vsync_mode(DisplayServer::VSyncMode p_mode, DisplayServer::WindowID p_window);