From 77bc3e9ac32936a8c4af011805eb785d378212af Mon Sep 17 00:00:00 2001 From: reduz Date: Mon, 4 Jan 2021 17:00:44 -0300 Subject: [PATCH] Threaded optimizations to cull and render -Reorganize thread work pool for rendering -Fixes to make secondary command buffers to work (disabled because they need more testing) --- core/templates/thread_work_pool.h | 1 + drivers/vulkan/rendering_device_vulkan.cpp | 14 +- .../renderer_rd/renderer_compositor_rd.cpp | 4 - .../renderer_rd/renderer_compositor_rd.h | 2 - .../renderer_scene_render_forward.cpp | 138 +++-- .../renderer_scene_render_forward.h | 40 +- servers/rendering/renderer_rd/shader_rd.cpp | 2 +- servers/rendering/renderer_scene_cull.cpp | 562 +++++++++--------- servers/rendering/renderer_scene_cull.h | 153 ++++- servers/rendering/renderer_storage.h | 2 +- servers/rendering/renderer_thread_pool.cpp | 42 ++ servers/rendering/renderer_thread_pool.h | 45 ++ servers/rendering_server.cpp | 7 + servers/rendering_server.h | 3 + 14 files changed, 634 insertions(+), 381 deletions(-) create mode 100644 servers/rendering/renderer_thread_pool.cpp create mode 100644 servers/rendering/renderer_thread_pool.h diff --git a/core/templates/thread_work_pool.h b/core/templates/thread_work_pool.h index 02d941d0f47..7c3508814fc 100644 --- a/core/templates/thread_work_pool.h +++ b/core/templates/thread_work_pool.h @@ -125,6 +125,7 @@ public: end_work(); } + _FORCE_INLINE_ int get_thread_count() const { return thread_count; } void init(int p_thread_count = -1); void finish(); ~ThreadWorkPool(); diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 52e090e4edd..6eadec4cce6 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -5638,7 +5638,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(Di vkCmdSetScissor(command_buffer, 0, 1, &scissor); - return ID_TYPE_DRAW_LIST; + return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass) { @@ -5905,7 +5905,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu vkCmdSetScissor(command_buffer, 0, 1, &scissor); draw_list->viewport = Rect2i(viewport_offset, viewport_size); - return ID_TYPE_DRAW_LIST; + return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector &p_storage_textures) { @@ -6002,7 +6002,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p for (uint32_t i = 0; i < p_splits; i++) { //take a command buffer and initialize it - VkCommandBuffer command_buffer = split_draw_list_allocators[p_splits].command_buffers[frame]; + VkCommandBuffer command_buffer = split_draw_list_allocators[i].command_buffers[frame]; VkCommandBufferInheritanceInfo inheritance_info; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; @@ -6060,7 +6060,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p scissor.extent.height = viewport_size.height; vkCmdSetScissor(command_buffer, 0, 1, &scissor); - r_split_ids[i] = (DrawListID(1) << DrawListID(ID_TYPE_SPLIT_DRAW_LIST)) + i; + r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i; draw_list[i].viewport = Rect2i(viewport_offset, viewport_size); } @@ -6075,7 +6075,7 @@ RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawL if (!draw_list) { return nullptr; - } else if (p_id == ID_TYPE_DRAW_LIST) { + } else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) { if (draw_list_split) { return nullptr; } @@ -6442,8 +6442,8 @@ void RenderingDeviceVulkan::draw_list_end() { //send all command buffers VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * draw_list_count); for (uint32_t i = 0; i < draw_list_count; i++) { - vkEndCommandBuffer(draw_list->command_buffer); - command_buffers[i] = draw_list->command_buffer; + vkEndCommandBuffer(draw_list[i].command_buffer); + command_buffers[i] = draw_list[i].command_buffer; } vkCmdExecuteCommands(frames[frame].draw_command_buffer, draw_list_count, command_buffers); diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index fb9c114ade7..be2552bd326 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -154,12 +154,9 @@ void RendererCompositorRD::initialize() { } } -ThreadWorkPool RendererCompositorRD::thread_work_pool; uint64_t RendererCompositorRD::frame = 1; void RendererCompositorRD::finalize() { - thread_work_pool.finish(); - memdelete(scene); memdelete(canvas); memdelete(storage); @@ -174,7 +171,6 @@ RendererCompositorRD *RendererCompositorRD::singleton = nullptr; RendererCompositorRD::RendererCompositorRD() { singleton = this; - thread_work_pool.init(); time = 0; storage = memnew(RendererStorageRD); diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index e1995872af1..cb85fc79e0e 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -90,8 +90,6 @@ public: virtual bool is_low_end() const { return false; } - static ThreadWorkPool thread_work_pool; - static RendererCompositorRD *singleton; RendererCompositorRD(); ~RendererCompositorRD() {} diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index b32fff6f01e..c0939f23eff 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -809,13 +809,13 @@ bool RendererSceneRenderForward::free(RID p_rid) { /// RENDERING /// template -void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { +void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) { RD::DrawListID draw_list = p_draw_list; RD::FramebufferFormatID framebuffer_format = p_framebuffer_Format; //global scope bindings RD::get_singleton()->draw_list_bind_uniform_set(draw_list, render_base_uniform_set, SCENE_UNIFORM_SET); - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_render_pass_uniform_set, RENDER_PASS_UNIFORM_SET); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_params->render_pass_uniform_set, RENDER_PASS_UNIFORM_SET); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, default_vec4_xform_uniform_set, TRANSFORMS_UNIFORM_SET); RID prev_material_uniform_set; @@ -825,12 +825,12 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList RID prev_pipeline_rd; RID prev_xforms_uniform_set; - bool shadow_pass = (p_pass_mode == PASS_MODE_SHADOW) || (p_pass_mode == PASS_MODE_SHADOW_DP); + bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP); - float old_offset[2]; + float old_offset[2] = { 0, 0 }; - for (int i = 0; i < p_element_count; i++) { - const GeometryInstanceSurfaceDataCache *surf = p_elements[i]; + for (uint32_t i = p_from_element; i < p_to_element; i++) { + const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i]; RID material_uniform_set; ShaderData *shader; @@ -851,21 +851,21 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList continue; } - if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL) { + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { old_offset[0] = surf->owner->push_constant.lightmap_uv_scale[0]; old_offset[1] = surf->owner->push_constant.lightmap_uv_scale[1]; - surf->owner->push_constant.lightmap_uv_scale[0] = p_uv_offset.x; - surf->owner->push_constant.lightmap_uv_scale[1] = p_uv_offset.y; + surf->owner->push_constant.lightmap_uv_scale[0] = p_params->uv_offset.x; + surf->owner->push_constant.lightmap_uv_scale[1] = p_params->uv_offset.y; } //find cull variant ShaderData::CullVariant cull_variant; - if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF || ((p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL || p_params->pass_mode == PASS_MODE_SDF || ((p_params->pass_mode == PASS_MODE_SHADOW || p_params->pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { cull_variant = ShaderData::CULL_VARIANT_DOUBLE_SIDED; } else { bool mirror = surf->owner->mirror; - if (p_reverse_cull) { + if (p_params->reverse_cull) { mirror = !mirror; } cull_variant = mirror ? ShaderData::CULL_VARIANT_REVERSED : ShaderData::CULL_VARIANT_NORMAL; @@ -876,7 +876,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList ShaderVersion shader_version = SHADER_VERSION_MAX; // Assigned to silence wrong -Wmaybe-initialized. - switch (p_pass_mode) { + switch (p_params->pass_mode) { case PASS_MODE_COLOR: case PASS_MODE_COLOR_TRANSPARENT: { if (surf->sort.uses_lightmap) { @@ -930,13 +930,13 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); } - if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) { + if (p_params->screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) { //lod - Vector3 support_min = surf->owner->transformed_aabb.get_support(-p_lod_plane.normal); - Vector3 support_max = surf->owner->transformed_aabb.get_support(p_lod_plane.normal); + Vector3 support_min = surf->owner->transformed_aabb.get_support(-p_params->lod_plane.normal); + Vector3 support_max = surf->owner->transformed_aabb.get_support(p_params->lod_plane.normal); - float distance_min = p_lod_plane.distance_to(support_min); - float distance_max = p_lod_plane.distance_to(support_max); + float distance_min = p_params->lod_plane.distance_to(support_min); + float distance_max = p_params->lod_plane.distance_to(support_max); float distance = 0.0; @@ -949,7 +949,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList distance = -distance_max; } - index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, surf->owner->lod_model_scale * surf->owner->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); + index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, surf->owner->lod_model_scale * surf->owner->lod_bias, distance * p_params->lod_distance_multiplier, p_params->screen_lod_threshold); } else { //no lod @@ -968,7 +968,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList prev_index_array_rd = index_array_rd; } - RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_force_wireframe); + RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_params->force_wireframe); if (pipeline_rd != prev_pipeline_rd) { // checking with prev shader does not make so much sense, as @@ -995,49 +995,76 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), surf->owner->instance_count); - if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL) { + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { surf->owner->push_constant.lightmap_uv_scale[0] = old_offset[0]; surf->owner->push_constant.lightmap_uv_scale[1] = old_offset[1]; } } } -void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { +void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) { //use template for faster performance (pass mode comparisons are inlined) - switch (p_pass_mode) { + + switch (p_params->pass_mode) { case PASS_MODE_COLOR: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_COLOR_SPECULAR: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_COLOR_TRANSPARENT: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_SHADOW: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_SHADOW_DP: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_DEPTH: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_DEPTH_MATERIAL: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; case PASS_MODE_SDF: { - _render_list_template(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; } } +void RendererSceneRenderForward::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) { + uint32_t render_total = p_params->element_count; + uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count(); + uint32_t render_from = p_thread * render_total / total_threads; + uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads); + _render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to); +} + +void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector &p_storage_textures) { + RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_framebuffer); + p_params->framebuffer_format = fb_format; + + if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time + //multi threaded + thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); + RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); + RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RendererSceneRenderForward::_render_list_thread_function, p_params); + RD::get_singleton()->draw_list_end(); + } else { + //single threaded + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); + _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); + RD::get_singleton()->draw_list_end(); + } +} + void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { //CameraMatrix projection = p_cam_projection; //projection.flip_y(); // Vulkan and modern APIs use Y-Down @@ -1428,7 +1455,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray &p_giprobes) { - scene_state.giprobes_used = MIN(p_giprobes.size(), MAX_GI_PROBES); + scene_state.giprobes_used = MIN(p_giprobes.size(), uint32_t(MAX_GI_PROBES)); for (uint32_t i = 0; i < scene_state.giprobes_used; i++) { scene_state.giprobe_ids[i] = p_giprobes[i]; } @@ -1681,9 +1708,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray(), PagedArray()); bool finish_depth = using_ssao || using_sdfgi || using_giprobe; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(depth_framebuffer), render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear); if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RENDER_TIMESTAMP("Resolve Depth Pre-Pass"); @@ -1731,13 +1757,13 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(framebuffer), render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + + _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); if (will_continue_color && using_separate_specular) { // close the specular framebuffer, as it's no longer used - draw_list = RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); RD::get_singleton()->draw_list_end(); } } @@ -1817,9 +1843,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf render_list.sort_by_reverse_depth_and_priority(true); { - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(alpha_framebuffer), &render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); } if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { @@ -1854,9 +1879,8 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr { //regular forward for now - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); } } @@ -1883,9 +1907,8 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, { //regular forward for now - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_fb), render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set); - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set); + _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); } } @@ -1911,6 +1934,7 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo render_list.sort_by_key(false); { + RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set); //regular forward for now Vector clear; clear.push_back(Color(0, 0, 0, 0)); @@ -1919,7 +1943,7 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo clear.push_back(Color(0, 0, 0, 0)); clear.push_back(Color(0, 0, 0, 0)); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); RD::get_singleton()->draw_list_end(); } } @@ -1946,6 +1970,7 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray clear; clear.push_back(Color(0, 0, 0, 0)); @@ -1973,9 +1998,11 @@ void RendererSceneRenderForward::_render_uv2(const PagedArrayframebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, true, ofs); //first wireframe, for pseudo conservative + render_list_params.uv_offset = ofs; + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //first wireframe, for pseudo conservative } - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); //second regular triangles + render_list_params.uv_offset = Vector2(); + _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //second regular triangles RD::get_singleton()->draw_list_end(); } @@ -2054,9 +2081,8 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto E = sdfgi_framebuffer_size_cache.insert(fb_size, fb); } - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, Rect2(), sbs); - _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(E->get()), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); //second regular triangles - RD::get_singleton()->draw_list_end(); + RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); + _render_list_with_threads(&render_list_params, E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, Rect2(), sbs); } } @@ -3380,6 +3406,8 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor sampler.compare_op = RD::COMPARE_OP_LESS; shadow_sampler = RD::get_singleton()->sampler_create(sampler); } + + render_list_thread_threshold = GLOBAL_GET("rendering/forward_renderer/threaded_render_minimum_instances"); } RendererSceneRenderForward::~RendererSceneRenderForward() { diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h index b90717ce223..8a6f268c467 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -425,10 +425,44 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { struct GeometryInstanceSurfaceDataCache; - template - _FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); + struct RenderListParameters { + GeometryInstanceSurfaceDataCache **elements = nullptr; + int element_count = 0; + bool reverse_cull = false; + PassMode pass_mode = PASS_MODE_COLOR; + bool no_gi = false; + RID render_pass_uniform_set; + bool force_wireframe = false; + Vector2 uv_offset; + Plane lod_plane; + float lod_distance_multiplier = 0.0; + float screen_lod_threshold = 0.0; + RD::FramebufferFormatID framebuffer_format = 0; + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) { + elements = p_elements; + element_count = p_element_count; + reverse_cull = p_reverse_cull; + pass_mode = p_pass_mode; + no_gi = p_no_gi; + render_pass_uniform_set = p_render_pass_uniform_set; + force_wireframe = p_force_wireframe; + uv_offset = p_uv_offset; + lod_plane = p_lod_plane; + lod_distance_multiplier = p_lod_distance_multiplier; + screen_lod_threshold = p_screen_lod_threshold; + } + }; - void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0); + template + _FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); + + void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); + + LocalVector thread_draw_lists; + void _render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params); + void _render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector &p_storage_textures = Vector()); + + uint32_t render_list_thread_threshold = 500; void _fill_render_list(const PagedArray &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false); diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index d1f07a354f9..e955cead05c 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -360,7 +360,7 @@ void ShaderRD::_compile_version(Version *p_version) { p_version->variants = memnew_arr(RID, variant_defines.size()); #if 1 - RendererCompositorRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); + RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); #else for (int i = 0; i < variant_defines.size(); i++) { _compile_variant(i, p_version); diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index f32d010d1f9..d3979521b1c 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -2224,6 +2224,222 @@ void RendererSceneCull::render_camera(RID p_render_buffers, Ref &p_ _render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); }; +void RendererSceneCull::_frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data) { + uint32_t cull_total = cull_data->scenario->instance_data.size(); + uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count(); + uint32_t cull_from = p_thread * cull_total / total_threads; + uint32_t cull_to = (p_thread + 1 == total_threads) ? cull_total : ((p_thread + 1) * cull_total / total_threads); + + _frustum_cull(*cull_data, frustum_cull_result_threads[p_thread], cull_from, cull_to); +} + +void RendererSceneCull::_frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to) { + uint64_t frame_number = RSG::rasterizer->get_frame_number(); + float lightmap_probe_update_speed = RSG::storage->lightmap_get_probe_capture_update_speed() * RSG::rasterizer->get_frame_delta_time(); + + uint32_t sdfgi_last_light_index = 0xFFFFFFFF; + uint32_t sdfgi_last_light_cascade = 0xFFFFFFFF; + + RID instance_pair_buffer[MAX_INSTANCE_PAIRS]; + + for (uint64_t i = p_from; i < p_to; i++) { + bool mesh_visible = false; + + if (cull_data.scenario->instance_aabbs[i].in_frustum(cull_data.cull->frustum)) { + InstanceData &idata = cull_data.scenario->instance_data[i]; + uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; + + if ((cull_data.visible_layers & idata.layer_mask) == 0) { + //failure + } else if (base_type == RS::INSTANCE_LIGHT) { + cull_result.lights.push_back(idata.instance); + cull_result.light_instances.push_back(RID::from_uint64(idata.instance_data_rid)); + if (cull_data.shadow_atlas.is_valid() && RSG::storage->light_has_shadow(idata.base_rid)) { + scene_render->light_instance_mark_visible(RID::from_uint64(idata.instance_data_rid)); //mark it visible for shadow allocation later + } + + } else if (base_type == RS::INSTANCE_REFLECTION_PROBE) { + if (cull_data.render_reflection_probe != idata.instance) { + //avoid entering The Matrix + + if ((idata.flags & InstanceData::FLAG_REFLECTION_PROBE_DIRTY) || scene_render->reflection_probe_instance_needs_redraw(RID::from_uint64(idata.instance_data_rid))) { + InstanceReflectionProbeData *reflection_probe = static_cast(idata.instance->base_data); + cull_data.cull->lock.lock(); + if (!reflection_probe->update_list.in_list()) { + reflection_probe->render_step = 0; + reflection_probe_render_list.add_last(&reflection_probe->update_list); + } + cull_data.cull->lock.unlock(); + + idata.flags &= ~uint32_t(InstanceData::FLAG_REFLECTION_PROBE_DIRTY); + } + + if (scene_render->reflection_probe_instance_has_reflection(RID::from_uint64(idata.instance_data_rid))) { + cull_result.reflections.push_back(RID::from_uint64(idata.instance_data_rid)); + } + } + } else if (base_type == RS::INSTANCE_DECAL) { + cull_result.decals.push_back(RID::from_uint64(idata.instance_data_rid)); + + } else if (base_type == RS::INSTANCE_GI_PROBE) { + InstanceGIProbeData *gi_probe = static_cast(idata.instance->base_data); + cull_data.cull->lock.lock(); + if (!gi_probe->update_element.in_list()) { + gi_probe_update_list.add(&gi_probe->update_element); + } + cull_data.cull->lock.unlock(); + cull_result.gi_probes.push_back(RID::from_uint64(idata.instance_data_rid)); + + } else if (base_type == RS::INSTANCE_LIGHTMAP) { + cull_result.gi_probes.push_back(RID::from_uint64(idata.instance_data_rid)); + } else if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && !(idata.flags & InstanceData::FLAG_CAST_SHADOWS_ONLY)) { + bool keep = true; + + if (idata.flags & InstanceData::FLAG_REDRAW_IF_VISIBLE) { + RenderingServerDefault::redraw_request(); + } + + if (base_type == RS::INSTANCE_MESH) { + mesh_visible = true; + } else if (base_type == RS::INSTANCE_PARTICLES) { + //particles visible? process them + if (RSG::storage->particles_is_inactive(idata.base_rid)) { + //but if nothing is going on, don't do it. + keep = false; + } else { + cull_data.cull->lock.lock(); + RSG::storage->particles_request_process(idata.base_rid); + cull_data.cull->lock.unlock(); + RSG::storage->particles_set_view_axis(idata.base_rid, -cull_data.cam_transform.basis.get_axis(2).normalized()); + //particles visible? request redraw + RenderingServerDefault::redraw_request(); + } + } + + if (geometry_instance_pair_mask & (1 << RS::INSTANCE_LIGHT) && (idata.flags & InstanceData::FLAG_GEOM_LIGHTING_DIRTY)) { + InstanceGeometryData *geom = static_cast(idata.instance->base_data); + uint32_t idx = 0; + + for (Set::Element *E = geom->lights.front(); E; E = E->next()) { + InstanceLightData *light = static_cast(E->get()->base_data); + instance_pair_buffer[idx++] = light->instance; + if (idx == MAX_INSTANCE_PAIRS) { + break; + } + } + + scene_render->geometry_instance_pair_light_instances(geom->geometry_instance, instance_pair_buffer, idx); + idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_LIGHTING_DIRTY); + } + + if (geometry_instance_pair_mask & (1 << RS::INSTANCE_REFLECTION_PROBE) && (idata.flags & InstanceData::FLAG_GEOM_REFLECTION_DIRTY)) { + InstanceGeometryData *geom = static_cast(idata.instance->base_data); + uint32_t idx = 0; + + for (Set::Element *E = geom->reflection_probes.front(); E; E = E->next()) { + InstanceReflectionProbeData *reflection_probe = static_cast(E->get()->base_data); + + instance_pair_buffer[idx++] = reflection_probe->instance; + if (idx == MAX_INSTANCE_PAIRS) { + break; + } + } + + scene_render->geometry_instance_pair_reflection_probe_instances(geom->geometry_instance, instance_pair_buffer, idx); + idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_REFLECTION_DIRTY); + } + + if (geometry_instance_pair_mask & (1 << RS::INSTANCE_DECAL) && (idata.flags & InstanceData::FLAG_GEOM_DECAL_DIRTY)) { + //InstanceGeometryData *geom = static_cast(idata.instance->base_data); + //todo for GLES3 + idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_DECAL_DIRTY); + /*for (Set::Element *E = geom->dec.front(); E; E = E->next()) { + InstanceReflectionProbeData *reflection_probe = static_cast(E->get()->base_data); + + instance_pair_buffer[idx++] = reflection_probe->instance; + if (idx==MAX_INSTANCE_PAIRS) { + break; + } + }*/ + //scene_render->geometry_instance_pair_decal_instances(geom->geometry_instance, light_instances, idx); + } + + if (idata.flags & InstanceData::FLAG_GEOM_GI_PROBE_DIRTY) { + InstanceGeometryData *geom = static_cast(idata.instance->base_data); + uint32_t idx = 0; + for (Set::Element *E = geom->gi_probes.front(); E; E = E->next()) { + InstanceGIProbeData *gi_probe = static_cast(E->get()->base_data); + + instance_pair_buffer[idx++] = gi_probe->probe_instance; + if (idx == MAX_INSTANCE_PAIRS) { + break; + } + } + + scene_render->geometry_instance_pair_gi_probe_instances(geom->geometry_instance, instance_pair_buffer, idx); + idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_GI_PROBE_DIRTY); + } + + if ((idata.flags & InstanceData::FLAG_LIGHTMAP_CAPTURE) && idata.instance->last_frame_pass != frame_number && !idata.instance->lightmap_target_sh.is_empty() && !idata.instance->lightmap_sh.is_empty()) { + InstanceGeometryData *geom = static_cast(idata.instance->base_data); + Color *sh = idata.instance->lightmap_sh.ptrw(); + const Color *target_sh = idata.instance->lightmap_target_sh.ptr(); + for (uint32_t j = 0; j < 9; j++) { + sh[j] = sh[j].lerp(target_sh[j], MIN(1.0, lightmap_probe_update_speed)); + } + scene_render->geometry_instance_set_lightmap_capture(geom->geometry_instance, sh); + idata.instance->last_frame_pass = frame_number; + } + + if (keep) { + cull_result.geometry_instances.push_back(idata.instance_geometry); + } + } + } + + for (uint32_t j = 0; j < cull_data.cull->shadow_count; j++) { + for (uint32_t k = 0; k < cull_data.cull->shadows[j].cascade_count; k++) { + if (cull_data.scenario->instance_aabbs[i].in_frustum(cull_data.cull->shadows[j].cascades[k].frustum)) { + InstanceData &idata = cull_data.scenario->instance_data[i]; + uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; + + if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && idata.flags & InstanceData::FLAG_CAST_SHADOWS) { + cull_result.directional_shadows[j].cascade_geometry_instances[k].push_back(idata.instance_geometry); + mesh_visible = true; + } + } + } + } + + for (uint32_t j = 0; j < cull_data.cull->sdfgi.region_count; j++) { + if (cull_data.scenario->instance_aabbs[i].in_aabb(cull_data.cull->sdfgi.region_aabb[j])) { + InstanceData &idata = cull_data.scenario->instance_data[i]; + uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; + + if (base_type == RS::INSTANCE_LIGHT) { + InstanceLightData *instance_light = (InstanceLightData *)idata.instance->base_data; + if (instance_light->bake_mode == RS::LIGHT_BAKE_STATIC && cull_data.cull->sdfgi.region_cascade[j] <= instance_light->max_sdfgi_cascade) { + if (sdfgi_last_light_index != i || sdfgi_last_light_cascade != cull_data.cull->sdfgi.region_cascade[j]) { + sdfgi_last_light_index = i; + sdfgi_last_light_cascade = cull_data.cull->sdfgi.region_cascade[j]; + cull_result.sdfgi_cascade_lights[sdfgi_last_light_cascade].push_back(instance_light->instance); + } + } + } else if ((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) { + if (idata.flags & InstanceData::FLAG_USES_BAKED_LIGHT) { + cull_result.sdfgi_region_geometry_instances[j].push_back(idata.instance_geometry); + mesh_visible = true; + } + } + } + } + + if (mesh_visible && cull_data.scenario->instance_data[i].flags & InstanceData::FLAG_USES_MESH_INSTANCE) { + cull_result.mesh_instances.push_back(cull_data.scenario->instance_data[i].instance->mesh_instance); + } + } +} + void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows) { // Note, in stereo rendering: // - p_cam_transform will be a transform in the middle of our two eyes @@ -2249,9 +2465,6 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(2).normalized()); - uint64_t frame_number = RSG::rasterizer->get_frame_number(); - float lightmap_probe_update_speed = RSG::storage->lightmap_get_probe_capture_update_speed() * RSG::rasterizer->get_frame_delta_time(); - /* STEP 2 - CULL */ cull.frustum = Frustum(planes); @@ -2259,13 +2472,6 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca Vector directional_lights; // directional lights { - //reset shadows - for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { - for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { - cull.shadows[i].cascades[j].cull_result.clear(); - } - } - cull.shadow_count = 0; Vector lights_with_shadow; @@ -2302,18 +2508,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca { //sdfgi cull.sdfgi.region_count = 0; - for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { - cull.sdfgi.region_cull_result[i].clear(); - } - - for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { - cull.sdfgi.cascade_lights[i].clear(); - } - if (p_render_buffers.is_valid()) { - for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { - cull.sdfgi.cascade_lights[i].clear(); - } cull.sdfgi.cascade_light_count = 0; uint32_t prev_cascade = 0xFFFFFFFF; @@ -2335,226 +2530,53 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca } } - { - //pre-clear results - geometry_instances_to_render.clear(); - light_cull_result.clear(); - lightmap_cull_result.clear(); - reflection_probe_instance_cull_result.clear(); - light_instance_cull_result.clear(); - gi_probe_instance_cull_result.clear(); - lightmap_cull_result.clear(); - decal_instance_cull_result.clear(); - mesh_instance_cull_result.clear(); - } + frustum_cull_result.clear(); { - uint64_t cull_count = scenario->instance_data.size(); - uint32_t sdfgi_last_light_index = 0xFFFFFFFF; - uint32_t sdfgi_last_light_cascade = 0xFFFFFFFF; + uint64_t cull_from = 0; + uint64_t cull_to = scenario->instance_data.size(); - RID instance_pair_buffer[MAX_INSTANCE_PAIRS]; + FrustumCullData cull_data; - for (uint64_t i = 0; i < cull_count; i++) { - bool mesh_visible = false; - - if (scenario->instance_aabbs[i].in_frustum(cull.frustum)) { - InstanceData &idata = scenario->instance_data[i]; - uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; - - if ((p_visible_layers & idata.layer_mask) == 0) { - //failure - } else if (base_type == RS::INSTANCE_LIGHT) { - light_cull_result.push_back(idata.instance); - light_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid)); - if (p_shadow_atlas.is_valid() && RSG::storage->light_has_shadow(idata.base_rid)) { - scene_render->light_instance_mark_visible(RID::from_uint64(idata.instance_data_rid)); //mark it visible for shadow allocation later - } - - } else if (base_type == RS::INSTANCE_REFLECTION_PROBE) { - if (render_reflection_probe != idata.instance) { - //avoid entering The Matrix - - if ((idata.flags & InstanceData::FLAG_REFLECTION_PROBE_DIRTY) || scene_render->reflection_probe_instance_needs_redraw(RID::from_uint64(idata.instance_data_rid))) { - InstanceReflectionProbeData *reflection_probe = static_cast(idata.instance->base_data); - cull.lock.lock(); - if (!reflection_probe->update_list.in_list()) { - reflection_probe->render_step = 0; - reflection_probe_render_list.add_last(&reflection_probe->update_list); - } - cull.lock.unlock(); - - idata.flags &= ~uint32_t(InstanceData::FLAG_REFLECTION_PROBE_DIRTY); - } - - if (scene_render->reflection_probe_instance_has_reflection(RID::from_uint64(idata.instance_data_rid))) { - reflection_probe_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid)); - } - } - } else if (base_type == RS::INSTANCE_DECAL) { - decal_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid)); - - } else if (base_type == RS::INSTANCE_GI_PROBE) { - InstanceGIProbeData *gi_probe = static_cast(idata.instance->base_data); - cull.lock.lock(); - if (!gi_probe->update_element.in_list()) { - gi_probe_update_list.add(&gi_probe->update_element); - } - cull.lock.unlock(); - gi_probe_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid)); - - } else if (base_type == RS::INSTANCE_LIGHTMAP) { - lightmap_cull_result.push_back(RID::from_uint64(idata.instance_data_rid)); - } else if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && !(idata.flags & InstanceData::FLAG_CAST_SHADOWS_ONLY)) { - bool keep = true; - - if (idata.flags & InstanceData::FLAG_REDRAW_IF_VISIBLE) { - RenderingServerDefault::redraw_request(); - } - - if (base_type == RS::INSTANCE_MESH) { - mesh_visible = true; - } else if (base_type == RS::INSTANCE_PARTICLES) { - //particles visible? process them - if (RSG::storage->particles_is_inactive(idata.base_rid)) { - //but if nothing is going on, don't do it. - keep = false; - } else { - cull.lock.lock(); - RSG::storage->particles_request_process(idata.base_rid); - cull.lock.unlock(); - RSG::storage->particles_set_view_axis(idata.base_rid, -p_cam_transform.basis.get_axis(2).normalized()); - //particles visible? request redraw - RenderingServerDefault::redraw_request(); - } - } - - if (geometry_instance_pair_mask & (1 << RS::INSTANCE_LIGHT) && (idata.flags & InstanceData::FLAG_GEOM_LIGHTING_DIRTY)) { - InstanceGeometryData *geom = static_cast(idata.instance->base_data); - uint32_t idx = 0; - - for (Set::Element *E = geom->lights.front(); E; E = E->next()) { - InstanceLightData *light = static_cast(E->get()->base_data); - instance_pair_buffer[idx++] = light->instance; - if (idx == MAX_INSTANCE_PAIRS) { - break; - } - } - - scene_render->geometry_instance_pair_light_instances(geom->geometry_instance, instance_pair_buffer, idx); - idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_LIGHTING_DIRTY); - } - - if (geometry_instance_pair_mask & (1 << RS::INSTANCE_REFLECTION_PROBE) && (idata.flags & InstanceData::FLAG_GEOM_REFLECTION_DIRTY)) { - InstanceGeometryData *geom = static_cast(idata.instance->base_data); - uint32_t idx = 0; - - for (Set::Element *E = geom->reflection_probes.front(); E; E = E->next()) { - InstanceReflectionProbeData *reflection_probe = static_cast(E->get()->base_data); - - instance_pair_buffer[idx++] = reflection_probe->instance; - if (idx == MAX_INSTANCE_PAIRS) { - break; - } - } - - scene_render->geometry_instance_pair_reflection_probe_instances(geom->geometry_instance, instance_pair_buffer, idx); - idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_REFLECTION_DIRTY); - } - - if (geometry_instance_pair_mask & (1 << RS::INSTANCE_DECAL) && (idata.flags & InstanceData::FLAG_GEOM_DECAL_DIRTY)) { - //InstanceGeometryData *geom = static_cast(idata.instance->base_data); - //todo for GLES3 - idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_DECAL_DIRTY); - /*for (Set::Element *E = geom->dec.front(); E; E = E->next()) { - InstanceReflectionProbeData *reflection_probe = static_cast(E->get()->base_data); - - instance_pair_buffer[idx++] = reflection_probe->instance; - if (idx==MAX_INSTANCE_PAIRS) { - break; - } - }*/ - //scene_render->geometry_instance_pair_decal_instances(geom->geometry_instance, light_instances, idx); - } - - if (idata.flags & InstanceData::FLAG_GEOM_GI_PROBE_DIRTY) { - InstanceGeometryData *geom = static_cast(idata.instance->base_data); - uint32_t idx = 0; - for (Set::Element *E = geom->gi_probes.front(); E; E = E->next()) { - InstanceGIProbeData *gi_probe = static_cast(E->get()->base_data); - - instance_pair_buffer[idx++] = gi_probe->probe_instance; - if (idx == MAX_INSTANCE_PAIRS) { - break; - } - } - - scene_render->geometry_instance_pair_gi_probe_instances(geom->geometry_instance, instance_pair_buffer, idx); - idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_GI_PROBE_DIRTY); - } - - if ((idata.flags & InstanceData::FLAG_LIGHTMAP_CAPTURE) && idata.instance->last_frame_pass != frame_number && !idata.instance->lightmap_target_sh.is_empty() && !idata.instance->lightmap_sh.is_empty()) { - InstanceGeometryData *geom = static_cast(idata.instance->base_data); - Color *sh = idata.instance->lightmap_sh.ptrw(); - const Color *target_sh = idata.instance->lightmap_target_sh.ptr(); - for (uint32_t j = 0; j < 9; j++) { - sh[j] = sh[j].lerp(target_sh[j], MIN(1.0, lightmap_probe_update_speed)); - } - scene_render->geometry_instance_set_lightmap_capture(geom->geometry_instance, sh); - idata.instance->last_frame_pass = frame_number; - } - - if (keep) { - geometry_instances_to_render.push_back(idata.instance_geometry); - } - } + //prepare for eventual thread usage + cull_data.cull = &cull; + cull_data.scenario = scenario; + cull_data.shadow_atlas = p_shadow_atlas; + cull_data.cam_transform = p_cam_transform; + cull_data.visible_layers = p_visible_layers; + cull_data.render_reflection_probe = render_reflection_probe; +//#define DEBUG_CULL_TIME +#ifdef DEBUG_CULL_TIME + uint64_t time_from = OS::get_singleton()->get_ticks_usec(); +#endif + if (cull_to > thread_cull_threshold) { + //multiple threads + for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { + frustum_cull_result_threads[i].clear(); } - for (uint32_t j = 0; j < cull.shadow_count; j++) { - for (uint32_t k = 0; k < cull.shadows[j].cascade_count; k++) { - if (scenario->instance_aabbs[i].in_frustum(cull.shadows[j].cascades[k].frustum)) { - InstanceData &idata = scenario->instance_data[i]; - uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; + RendererThreadPool::singleton->thread_work_pool.do_work(frustum_cull_result_threads.size(), this, &RendererSceneCull::_frustum_cull_threaded, &cull_data); - if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && idata.flags & InstanceData::FLAG_CAST_SHADOWS) { - cull.shadows[j].cascades[k].cull_result.push_back(idata.instance_geometry); - mesh_visible = true; - } - } - } + for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { + frustum_cull_result.append_from(frustum_cull_result_threads[i]); } - for (uint32_t j = 0; j < cull.sdfgi.region_count; j++) { - if (scenario->instance_aabbs[i].in_aabb(cull.sdfgi.region_aabb[j])) { - InstanceData &idata = scenario->instance_data[i]; - uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; - - if (base_type == RS::INSTANCE_LIGHT) { - InstanceLightData *instance_light = (InstanceLightData *)idata.instance->base_data; - if (instance_light->bake_mode == RS::LIGHT_BAKE_STATIC && cull.sdfgi.region_cascade[j] <= instance_light->max_sdfgi_cascade) { - if (sdfgi_last_light_index != i || sdfgi_last_light_cascade != cull.sdfgi.region_cascade[j]) { - sdfgi_last_light_index = i; - sdfgi_last_light_cascade = cull.sdfgi.region_cascade[j]; - cull.sdfgi.cascade_lights[sdfgi_last_light_cascade].push_back(instance_light->instance); - } - } - } else if ((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) { - if (idata.flags & InstanceData::FLAG_USES_BAKED_LIGHT) { - cull.sdfgi.region_cull_result[j].push_back(idata.instance_geometry); - mesh_visible = true; - } - } - } - } - - if (mesh_visible && scenario->instance_data[i].flags & InstanceData::FLAG_USES_MESH_INSTANCE) { - mesh_instance_cull_result.push_back(scenario->instance_data[i].instance->mesh_instance); - } + } else { + //single threaded + _frustum_cull(cull_data, frustum_cull_result, cull_from, cull_to); } - if (mesh_instance_cull_result.size()) { - for (uint64_t i = 0; i < mesh_instance_cull_result.size(); i++) { - RSG::storage->mesh_instance_check_for_update(mesh_instance_cull_result[i]); +#ifdef DEBUG_CULL_TIME + static float time_avg = 0; + static uint32_t time_count = 0; + time_avg += double(OS::get_singleton()->get_ticks_usec() - time_from) / 1000.0; + time_count++; + print_line("time taken: " + rtos(time_avg / time_count)); +#endif + + if (frustum_cull_result.mesh_instances.size()) { + for (uint64_t i = 0; i < frustum_cull_result.mesh_instances.size(); i++) { + RSG::storage->mesh_instance_check_for_update(frustum_cull_result.mesh_instances[i]); } RSG::storage->update_mesh_instances(); } @@ -2567,7 +2589,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j]; // print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size())); scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale); - scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, c.cull_result, near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold); + scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j], near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold); } } @@ -2577,19 +2599,19 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca if (cull.sdfgi.region_count > 0) { //update regions for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { - scene_render->render_sdfgi(p_render_buffers, i, cull.sdfgi.region_cull_result[i]); + scene_render->render_sdfgi(p_render_buffers, i, frustum_cull_result.sdfgi_region_geometry_instances[i]); } //check if static lights were culled bool static_lights_culled = false; for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) { - if (cull.sdfgi.cascade_lights[i].size()) { + if (frustum_cull_result.sdfgi_cascade_lights[i].size()) { static_lights_culled = true; break; } } if (static_lights_culled) { - scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, cull.sdfgi.cascade_lights); + scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, frustum_cull_result.sdfgi_cascade_lights); } } @@ -2618,8 +2640,8 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca //SortArray sorter; //sorter.sort(light_cull_result,light_cull_count); - for (uint32_t i = 0; i < (uint32_t)light_cull_result.size(); i++) { - Instance *ins = light_cull_result[i]; + for (uint32_t i = 0; i < (uint32_t)frustum_cull_result.lights.size(); i++) { + Instance *ins = frustum_cull_result.lights[i]; if (!p_shadow_atlas.is_valid() || !RSG::storage->light_has_shadow(ins->base)) { continue; @@ -2715,7 +2737,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca //append the directional lights to the lights culled for (int i = 0; i < directional_lights.size(); i++) { - light_instance_cull_result.push_back(directional_lights[i]); + frustum_cull_result.light_instances.push_back(directional_lights[i]); } } @@ -2752,7 +2774,7 @@ void RendererSceneCull::_render_scene(RID p_render_buffers, const Transform p_ca /* PROCESS GEOMETRY AND DRAW SCENE */ RENDER_TIMESTAMP("Render Scene "); - scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, geometry_instances_to_render, light_instance_cull_result, reflection_probe_instance_cull_result, gi_probe_instance_cull_result, decal_instance_cull_result, lightmap_cull_result, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold); + scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold); } void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) { @@ -3042,7 +3064,7 @@ void RendererSceneCull::render_probes() { update_lights = true; } - geometry_instances_to_render.clear(); + frustum_cull_result.geometry_instances.clear(); RID instance_pair_buffer[MAX_INSTANCE_PAIRS]; @@ -3069,10 +3091,10 @@ void RendererSceneCull::render_probes() { ins->scenario->instance_data[ins->array_index].flags &= ~uint32_t(InstanceData::FLAG_GEOM_GI_PROBE_DIRTY); } - geometry_instances_to_render.push_back(geom->geometry_instance); + frustum_cull_result.geometry_instances.push_back(geom->geometry_instance); } - scene_render->gi_probe_update(probe->probe_instance, update_lights, probe->light_instances, geometry_instances_to_render); + scene_render->gi_probe_update(probe->probe_instance, update_lights, probe->light_instances, frustum_cull_result.geometry_instances); gi_probe_update_list.remove(gi_probe); @@ -3087,7 +3109,7 @@ void RendererSceneCull::render_particle_colliders() { if (hfpc->scenario && hfpc->base_type == RS::INSTANCE_PARTICLES_COLLISION && RSG::storage->particles_collision_is_heightfield(hfpc->base)) { //update heightfield instance_cull_result.clear(); - geometry_instances_to_render.clear(); + frustum_cull_result.geometry_instances.clear(); struct CullAABB { PagedArray *result; @@ -3109,10 +3131,10 @@ void RendererSceneCull::render_particle_colliders() { continue; } InstanceGeometryData *geom = static_cast(instance->base_data); - geometry_instances_to_render.push_back(geom->geometry_instance); + frustum_cull_result.geometry_instances.push_back(geom->geometry_instance); } - scene_render->render_particle_collider_heightfield(hfpc->base, hfpc->transform, geometry_instances_to_render); + scene_render->render_particle_collider_heightfield(hfpc->base, hfpc->transform, frustum_cull_result.geometry_instances); } heightfield_particle_colliders_update_list.erase(heightfield_particle_colliders_update_list.front()); } @@ -3457,62 +3479,30 @@ RendererSceneCull::RendererSceneCull() { singleton = this; instance_cull_result.set_page_pool(&instance_cull_page_pool); - mesh_instance_cull_result.set_page_pool(&rid_cull_page_pool); instance_shadow_cull_result.set_page_pool(&instance_cull_page_pool); - light_cull_result.set_page_pool(&instance_cull_page_pool); - geometry_instances_to_render.set_page_pool(&geometry_instance_cull_page_pool); geometry_instances_to_shadow_render.set_page_pool(&geometry_instance_cull_page_pool); - lightmap_cull_result.set_page_pool(&rid_cull_page_pool); - reflection_probe_instance_cull_result.set_page_pool(&rid_cull_page_pool); - light_instance_cull_result.set_page_pool(&rid_cull_page_pool); - gi_probe_instance_cull_result.set_page_pool(&rid_cull_page_pool); - decal_instance_cull_result.set_page_pool(&rid_cull_page_pool); - - for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { - for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { - cull.shadows[i].cascades[j].cull_result.set_page_pool(&geometry_instance_cull_page_pool); - } - } - - for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { - cull.sdfgi.region_cull_result[i].set_page_pool(&geometry_instance_cull_page_pool); - } - - for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { - cull.sdfgi.cascade_lights[i].set_page_pool(&rid_cull_page_pool); + frustum_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool); + frustum_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); + for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { + frustum_cull_result_threads[i].init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool); } indexer_update_iterations = GLOBAL_GET("rendering/spatial_indexer/update_iterations_per_frame"); + thread_cull_threshold = GLOBAL_GET("rendering/spatial_indexer/threaded_cull_minimum_instances"); + thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)RendererThreadPool::singleton->thread_work_pool.get_thread_count()); //make sure there is at least one thread per CPU } RendererSceneCull::~RendererSceneCull() { instance_cull_result.reset(); - mesh_instance_cull_result.reset(); instance_shadow_cull_result.reset(); - light_cull_result.reset(); - geometry_instances_to_render.reset(); geometry_instances_to_shadow_render.reset(); - lightmap_cull_result.reset(); - reflection_probe_instance_cull_result.reset(); - light_instance_cull_result.reset(); - gi_probe_instance_cull_result.reset(); - decal_instance_cull_result.reset(); - - for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { - for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { - cull.shadows[i].cascades[j].cull_result.reset(); - } - } - - for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { - cull.sdfgi.region_cull_result[i].reset(); - } - - for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { - cull.sdfgi.cascade_lights[i].reset(); + frustum_cull_result.reset(); + for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { + frustum_cull_result_threads[i].reset(); } + frustum_cull_result_threads.clear(); } diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index 6399f145a37..796fb147437 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -352,7 +352,7 @@ public: bool receive_shadows : 8; bool visible : 8; bool baked_light : 2; //this flag is only to know if it actually did use baked light - bool dynamic_gi : 2; //this flag is only to know if it actually did use baked light + bool dynamic_gi : 2; //same above for dynamic objects bool redraw_if_visible : 4; Instance *lightmap; @@ -688,15 +688,6 @@ public: } }; - struct CullResult { - PagedArray *result; - _FORCE_INLINE_ bool operator()(void *p_data) { - Instance *p_instance = (Instance *)p_data; - result->push_back(p_instance); - return false; - } - }; - Set heightfield_particle_colliders_update_list; PagedArrayPool instance_cull_page_pool; @@ -704,17 +695,127 @@ public: PagedArrayPool rid_cull_page_pool; PagedArray instance_cull_result; - PagedArray mesh_instance_cull_result; - PagedArray geometry_instances_to_render; PagedArray instance_shadow_cull_result; PagedArray geometry_instances_to_shadow_render; - PagedArray light_cull_result; - PagedArray lightmap_cull_result; - PagedArray reflection_probe_instance_cull_result; - PagedArray light_instance_cull_result; - PagedArray gi_probe_instance_cull_result; - PagedArray decal_instance_cull_result; + struct FrustumCullResult { + PagedArray geometry_instances; + PagedArray lights; + PagedArray light_instances; + PagedArray lightmaps; + PagedArray reflections; + PagedArray decals; + PagedArray gi_probes; + PagedArray mesh_instances; + + struct DirectionalShadow { + PagedArray cascade_geometry_instances[RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES]; + } directional_shadows[RendererSceneRender::MAX_DIRECTIONAL_LIGHTS]; + + PagedArray sdfgi_region_geometry_instances[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; + PagedArray sdfgi_cascade_lights[SDFGI_MAX_CASCADES]; + + void clear() { + geometry_instances.clear(); + lights.clear(); + light_instances.clear(); + lightmaps.clear(); + reflections.clear(); + decals.clear(); + gi_probes.clear(); + mesh_instances.clear(); + for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { + for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { + directional_shadows[i].cascade_geometry_instances[j].clear(); + } + } + + for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + sdfgi_region_geometry_instances[i].clear(); + } + + for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { + sdfgi_cascade_lights[i].clear(); + } + } + + void reset() { + geometry_instances.reset(); + lights.reset(); + light_instances.reset(); + lightmaps.reset(); + reflections.reset(); + decals.reset(); + gi_probes.reset(); + mesh_instances.reset(); + for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { + for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { + directional_shadows[i].cascade_geometry_instances[j].reset(); + } + } + + for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + sdfgi_region_geometry_instances[i].reset(); + } + + for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { + sdfgi_cascade_lights[i].reset(); + } + } + + void append_from(FrustumCullResult &p_cull_result) { + geometry_instances.merge_unordered(p_cull_result.geometry_instances); + lights.merge_unordered(p_cull_result.lights); + light_instances.merge_unordered(p_cull_result.light_instances); + lightmaps.merge_unordered(p_cull_result.lightmaps); + reflections.merge_unordered(p_cull_result.reflections); + decals.merge_unordered(p_cull_result.decals); + gi_probes.merge_unordered(p_cull_result.gi_probes); + mesh_instances.merge_unordered(p_cull_result.mesh_instances); + + for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { + for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { + directional_shadows[i].cascade_geometry_instances[j].merge_unordered(p_cull_result.directional_shadows[i].cascade_geometry_instances[j]); + } + } + + for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + sdfgi_region_geometry_instances[i].merge_unordered(p_cull_result.sdfgi_region_geometry_instances[i]); + } + + for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { + sdfgi_cascade_lights[i].merge_unordered(p_cull_result.sdfgi_cascade_lights[i]); + } + } + + void init(PagedArrayPool *p_rid_pool, PagedArrayPool *p_geometry_instance_pool, PagedArrayPool *p_instance_pool) { + geometry_instances.set_page_pool(p_geometry_instance_pool); + light_instances.set_page_pool(p_rid_pool); + lights.set_page_pool(p_instance_pool); + lightmaps.set_page_pool(p_rid_pool); + reflections.set_page_pool(p_rid_pool); + decals.set_page_pool(p_rid_pool); + mesh_instances.set_page_pool(p_rid_pool); + for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) { + for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) { + directional_shadows[i].cascade_geometry_instances[j].set_page_pool(p_geometry_instance_pool); + } + } + + for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + sdfgi_region_geometry_instances[i].set_page_pool(p_geometry_instance_pool); + } + + for (int i = 0; i < SDFGI_MAX_CASCADES; i++) { + sdfgi_cascade_lights[i].set_page_pool(p_rid_pool); + } + } + }; + + FrustumCullResult frustum_cull_result; + LocalVector frustum_cull_result_threads; + + uint32_t thread_cull_threshold = 200; RID_PtrOwner instance_owner; @@ -786,8 +887,6 @@ public: real_t range_begin; Vector2 uv_scale; - PagedArray cull_result; - } cascades[RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES]; //max 4 cascades uint32_t cascade_count; @@ -797,12 +896,10 @@ public: struct SDFGI { //have arrays here because SDFGI functions expects this, plus regions can have areas - PagedArray region_cull_result[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; AABB region_aabb[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; //max 3 regions per cascade uint32_t region_cascade[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; //max 3 regions per cascade uint32_t region_count = 0; - PagedArray cascade_lights[SDFGI_MAX_CASCADES]; uint32_t cascade_light_index[SDFGI_MAX_CASCADES]; uint32_t cascade_light_count = 0; @@ -813,6 +910,18 @@ public: Frustum frustum; } cull; + struct FrustumCullData { + Cull *cull; + Scenario *scenario; + RID shadow_atlas; + Transform cam_transform; + uint32_t visible_layers; + Instance *render_reflection_probe; + }; + + void _frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data); + void _frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to); + bool _render_reflection_probe_step(Instance *p_instance, int p_step); void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows = true); void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); diff --git a/servers/rendering/renderer_storage.h b/servers/rendering/renderer_storage.h index 6f79e8f911f..3e53f7130a0 100644 --- a/servers/rendering/renderer_storage.h +++ b/servers/rendering/renderer_storage.h @@ -113,7 +113,7 @@ public: ~DependencyTracker() { clear(); } private: - friend class Dependency; + friend struct Dependency; uint32_t instance_version = 0; Set dependencies; }; diff --git a/servers/rendering/renderer_thread_pool.cpp b/servers/rendering/renderer_thread_pool.cpp new file mode 100644 index 00000000000..98050dd5080 --- /dev/null +++ b/servers/rendering/renderer_thread_pool.cpp @@ -0,0 +1,42 @@ +/*************************************************************************/ +/* renderer_thread_pool.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "renderer_thread_pool.h" + +RendererThreadPool *RendererThreadPool::singleton = nullptr; + +RendererThreadPool::RendererThreadPool() { + singleton = this; + thread_work_pool.init(); +} + +RendererThreadPool::~RendererThreadPool() { + thread_work_pool.finish(); +} diff --git a/servers/rendering/renderer_thread_pool.h b/servers/rendering/renderer_thread_pool.h new file mode 100644 index 00000000000..ae25415a0dd --- /dev/null +++ b/servers/rendering/renderer_thread_pool.h @@ -0,0 +1,45 @@ +/*************************************************************************/ +/* renderer_thread_pool.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RENDERERTHREADPOOL_H +#define RENDERERTHREADPOOL_H + +#include "core/templates/thread_work_pool.h" + +class RendererThreadPool { +public: + ThreadWorkPool thread_work_pool; + + static RendererThreadPool *singleton; + RendererThreadPool(); + ~RendererThreadPool(); +}; + +#endif // RENDERERTHREADPOOL_H diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 758a9a34cd3..b87171dc5ec 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -2253,6 +2253,8 @@ void RenderingServer::set_render_loop_enabled(bool p_enabled) { RenderingServer::RenderingServer() { //ERR_FAIL_COND(singleton); + + thread_pool = memnew(RendererThreadPool); singleton = this; GLOBAL_DEF_RST("rendering/vram_compression/import_bptc", false); @@ -2383,8 +2385,13 @@ RenderingServer::RenderingServer() { GLOBAL_DEF("rendering/spatial_indexer/update_iterations_per_frame", 10); ProjectSettings::get_singleton()->set_custom_property_info("rendering/spatial_indexer/update_iterations_per_frame", PropertyInfo(Variant::INT, "rendering/spatial_indexer/update_iterations_per_frame", PROPERTY_HINT_RANGE, "0,1024,1")); + GLOBAL_DEF("rendering/spatial_indexer/threaded_cull_minimum_instances", 1000); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/spatial_indexer/threaded_cull_minimum_instances", PropertyInfo(Variant::INT, "rendering/spatial_indexer/threaded_cull_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1")); + GLOBAL_DEF("rendering/forward_renderer/threaded_render_minimum_instances", 500); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/forward_renderer/threaded_render_minimum_instances", PropertyInfo(Variant::INT, "rendering/forward_renderer/threaded_render_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1")); } RenderingServer::~RenderingServer() { + memdelete(thread_pool); singleton = nullptr; } diff --git a/servers/rendering_server.h b/servers/rendering_server.h index 7db2924612d..5481079694c 100644 --- a/servers/rendering_server.h +++ b/servers/rendering_server.h @@ -39,6 +39,7 @@ #include "core/variant/typed_array.h" #include "core/variant/variant.h" #include "servers/display_server.h" +#include "servers/rendering/renderer_thread_pool.h" #include "servers/rendering/rendering_device.h" #include "servers/rendering/shader_language.h" @@ -52,6 +53,8 @@ class RenderingServer : public Object { Array _get_array_from_surface(uint32_t p_format, Vector p_vertex_data, Vector p_attrib_data, Vector p_skin_data, int p_vertex_len, Vector p_index_data, int p_index_len) const; + RendererThreadPool *thread_pool = nullptr; + protected: RID _make_test_cube(); void _free_internal_rids();