diff --git a/drivers/d3d12/rendering_context_driver_d3d12.cpp b/drivers/d3d12/rendering_context_driver_d3d12.cpp index 726be064bdc..128b8bcd037 100644 --- a/drivers/d3d12/rendering_context_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_context_driver_d3d12.cpp @@ -173,6 +173,7 @@ Error RenderingContextDriverD3D12::_initialize_devices() { Device &device = driver_devices[i]; device.name = desc.Description; device.vendor = Vendor(desc.VendorId); + device.workarounds = Workarounds(); if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { device.type = DEVICE_TYPE_CPU; diff --git a/drivers/vulkan/rendering_context_driver_vulkan.cpp b/drivers/vulkan/rendering_context_driver_vulkan.cpp index 6eb25743f92..7cba8209780 100644 --- a/drivers/vulkan/rendering_context_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_context_driver_vulkan.cpp @@ -502,6 +502,9 @@ Error RenderingContextDriverVulkan::_initialize_devices() { driver_device.name = String::utf8(props.deviceName); driver_device.vendor = Vendor(props.vendorID); driver_device.type = DeviceType(props.deviceType); + driver_device.workarounds = Workarounds(); + + _check_driver_workarounds(props, driver_device); uint32_t queue_family_properties_count = 0; vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &queue_family_properties_count, nullptr); @@ -515,6 +518,31 @@ Error RenderingContextDriverVulkan::_initialize_devices() { return OK; } +void RenderingContextDriverVulkan::_check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device) { + // Workaround for the Adreno 6XX family of devices. + // + // There's a known issue with the Vulkan driver in this family of devices where it'll crash if a dynamic state for drawing is + // used in a command buffer before a dispatch call is issued. As both dynamic scissor and viewport are basic requirements for + // the engine to not bake this state into the PSO, the only known way to fix this issue is to reset the command buffer entirely. + // + // As the render graph has no built in limitations of whether it'll issue compute work before anything needs to draw on the + // frame, and there's no guarantee that compute work will never be dependent on rasterization in the future, this workaround + // will end recording on the current command buffer any time a compute list is encountered after a draw list was executed. + // A new command buffer will be created afterwards and the appropriate synchronization primitives will be inserted. + // + // Executing this workaround has the added cost of synchronization between all the command buffers that are created as well as + // all the individual submissions. This performance hit is accepted for the sake of being able to support these devices without + // limiting the design of the renderer. + // + // This bug was fixed in driver version 512.503.0, so we only enabled it on devices older than this. + // + r_device.workarounds.avoid_compute_after_draw = + r_device.vendor == VENDOR_QUALCOMM && + p_device_properties.deviceID >= 0x6000000 && // Adreno 6xx + p_device_properties.driverVersion < VK_MAKE_VERSION(512, 503, 0) && + r_device.name.find("Turnip") < 0; +} + bool RenderingContextDriverVulkan::_use_validation_layers() const { return Engine::get_singleton()->is_validation_layers_enabled(); } diff --git a/drivers/vulkan/rendering_context_driver_vulkan.h b/drivers/vulkan/rendering_context_driver_vulkan.h index 6348f90d55e..f1d4021e322 100644 --- a/drivers/vulkan/rendering_context_driver_vulkan.h +++ b/drivers/vulkan/rendering_context_driver_vulkan.h @@ -105,6 +105,7 @@ private: Error _initialize_instance_extensions(); Error _initialize_instance(); Error _initialize_devices(); + void _check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device); // Static callbacks. static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data); diff --git a/servers/rendering/rendering_context_driver.h b/servers/rendering/rendering_context_driver.h index df1424da951..539b3814a04 100644 --- a/servers/rendering/rendering_context_driver.h +++ b/servers/rendering/rendering_context_driver.h @@ -73,10 +73,15 @@ public: DEVICE_TYPE_MAX = 0x5 }; + struct Workarounds { + bool avoid_compute_after_draw = false; + }; + struct Device { String name = "Unknown"; Vendor vendor = VENDOR_UNKNOWN; DeviceType type = DEVICE_TYPE_OTHER; + Workarounds workarounds; }; virtual ~RenderingContextDriver(); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 31fc51efaa9..6746a1dde1c 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -4877,25 +4877,78 @@ void RenderingDevice::_end_frame() { ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } - draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); driver->command_buffer_end(frames[frame].setup_command_buffer); - driver->command_buffer_end(frames[frame].draw_command_buffer); + + // The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use. + RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; + draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool); + driver->command_buffer_end(command_buffer); driver->end_segment(); } void RenderingDevice::_execute_frame(bool p_present) { + // Check whether this frame should present the swap chains and in which queue. const bool frame_can_present = p_present && !frames[frame].swap_chains_to_present.is_empty(); const bool separate_present_queue = main_queue != present_queue; - const VectorView execute_draw_semaphore = frame_can_present && separate_present_queue ? frames[frame].draw_semaphore : VectorView(); - const VectorView execute_draw_swap_chains = frame_can_present && !separate_present_queue ? frames[frame].swap_chains_to_present : VectorView(); + thread_local LocalVector swap_chains; + swap_chains.clear(); + + // Execute the setup command buffer. driver->command_queue_execute_and_present(main_queue, {}, frames[frame].setup_command_buffer, frames[frame].setup_semaphore, {}, {}); - driver->command_queue_execute_and_present(main_queue, frames[frame].setup_semaphore, frames[frame].draw_command_buffer, execute_draw_semaphore, frames[frame].draw_fence, execute_draw_swap_chains); + + // Execute command buffers and use semaphores to wait on the execution of the previous one. Normally there's only one command buffer, + // but driver workarounds can force situations where there'll be more. + uint32_t command_buffer_count = 1; + RDG::CommandBufferPool &buffer_pool = frames[frame].command_buffer_pool; + if (buffer_pool.buffers_used > 0) { + command_buffer_count += buffer_pool.buffers_used; + buffer_pool.buffers_used = 0; + } + + RDD::SemaphoreID wait_semaphore = frames[frame].setup_semaphore; + for (uint32_t i = 0; i < command_buffer_count; i++) { + RDD::CommandBufferID command_buffer; + RDD::SemaphoreID signal_semaphore; + RDD::FenceID signal_fence; + if (i > 0) { + command_buffer = buffer_pool.buffers[i - 1]; + signal_semaphore = buffer_pool.semaphores[i - 1]; + } else { + command_buffer = frames[frame].draw_command_buffer; + signal_semaphore = frames[frame].draw_semaphore; + } + + bool signal_semaphore_valid; + if (i == (command_buffer_count - 1)) { + // This is the last command buffer, it should signal the fence. + signal_fence = frames[frame].draw_fence; + signal_semaphore_valid = false; + + if (frame_can_present && separate_present_queue) { + // The semaphore is required if the frame can be presented and a separate present queue is used. + signal_semaphore_valid = true; + } else if (frame_can_present) { + // Just present the swap chains as part of the last command execution. + swap_chains = frames[frame].swap_chains_to_present; + } + } else { + // Semaphores always need to be signaled if it's not the last command buffer. + signal_semaphore_valid = true; + } + + driver->command_queue_execute_and_present(main_queue, wait_semaphore, command_buffer, signal_semaphore_valid ? signal_semaphore : VectorView(), signal_fence, swap_chains); + + // Make the next command buffer wait on the semaphore signaled by this one. + wait_semaphore = signal_semaphore; + } + + // Indicate the fence has been signaled so the next time the frame's contents need to be used, the CPU needs to wait on the work to be completed. frames[frame].draw_fence_signaled = true; if (frame_can_present) { if (separate_present_queue) { // Issue the presentation separately if the presentation queue is different from the main queue. - driver->command_queue_execute_and_present(present_queue, frames[frame].draw_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present); + driver->command_queue_execute_and_present(present_queue, wait_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present); } frames[frame].swap_chains_to_present.clear(); @@ -5044,6 +5097,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements); frames[i].timestamp_result_values.resize(max_timestamp_query_elements); frames[i].timestamp_result_count = 0; + + // Assign the main queue family and command pool to the command buffer pool. + frames[i].command_buffer_pool.pool = frames[i].command_pool; } // Start from frame count, so everything else is immediately old. @@ -5055,7 +5111,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ driver->command_buffer_begin(frames[0].draw_command_buffer); // Create draw graph and start it initialized as well. - draw_graph.initialize(driver, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME); + draw_graph.initialize(driver, device, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME); draw_graph.begin(); for (uint32_t i = 0; i < frames.size(); i++) { @@ -5388,6 +5444,11 @@ void RenderingDevice::finalize() { driver->semaphore_free(frames[i].setup_semaphore); driver->semaphore_free(frames[i].draw_semaphore); driver->fence_free(frames[i].draw_fence); + + RDG::CommandBufferPool &buffer_pool = frames[i].command_buffer_pool; + for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) { + driver->semaphore_free(buffer_pool.semaphores[j]); + } } if (pipeline_cache_enabled) { diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 9db2fdfbf4d..020be6be18a 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1261,6 +1261,9 @@ private: // Swap chains prepared for drawing during the frame that must be presented. LocalVector swap_chains_to_present; + // Extra command buffer pool used for driver workarounds. + RDG::CommandBufferPool command_buffer_pool; + struct Timestamp { String description; uint64_t value = 0; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index b04f2ebbaa8..c7de5c67cb3 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -753,71 +753,96 @@ void RenderingDeviceGraph::_wait_for_secondary_command_buffer_tasks() { } } -void RenderingDeviceGraph::_run_render_commands(RDD::CommandBufferID p_command_buffer, int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level) { +void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool, int32_t &r_current_label_index, int32_t &r_current_label_level) { for (uint32_t i = 0; i < p_sorted_commands_count; i++) { const uint32_t command_index = p_sorted_commands[i].index; const uint32_t command_data_offset = command_data_offsets[command_index]; const RecordedCommand *command = reinterpret_cast(&command_data[command_data_offset]); - _run_label_command_change(p_command_buffer, command->label_index, p_level, false, true, &p_sorted_commands[i], p_sorted_commands_count - i, r_current_label_index, r_current_label_level); + _run_label_command_change(r_command_buffer, command->label_index, p_level, false, true, &p_sorted_commands[i], p_sorted_commands_count - i, r_current_label_index, r_current_label_level); switch (command->type) { case RecordedCommand::TYPE_BUFFER_CLEAR: { const RecordedBufferClearCommand *buffer_clear_command = reinterpret_cast(command); - driver->command_clear_buffer(p_command_buffer, buffer_clear_command->buffer, buffer_clear_command->offset, buffer_clear_command->size); + driver->command_clear_buffer(r_command_buffer, buffer_clear_command->buffer, buffer_clear_command->offset, buffer_clear_command->size); } break; case RecordedCommand::TYPE_BUFFER_COPY: { const RecordedBufferCopyCommand *buffer_copy_command = reinterpret_cast(command); - driver->command_copy_buffer(p_command_buffer, buffer_copy_command->source, buffer_copy_command->destination, buffer_copy_command->region); + driver->command_copy_buffer(r_command_buffer, buffer_copy_command->source, buffer_copy_command->destination, buffer_copy_command->region); } break; case RecordedCommand::TYPE_BUFFER_GET_DATA: { const RecordedBufferGetDataCommand *buffer_get_data_command = reinterpret_cast(command); - driver->command_copy_buffer(p_command_buffer, buffer_get_data_command->source, buffer_get_data_command->destination, buffer_get_data_command->region); + driver->command_copy_buffer(r_command_buffer, buffer_get_data_command->source, buffer_get_data_command->destination, buffer_get_data_command->region); } break; case RecordedCommand::TYPE_BUFFER_UPDATE: { const RecordedBufferUpdateCommand *buffer_update_command = reinterpret_cast(command); const RecordedBufferCopy *command_buffer_copies = buffer_update_command->buffer_copies(); for (uint32_t j = 0; j < buffer_update_command->buffer_copies_count; j++) { - driver->command_copy_buffer(p_command_buffer, command_buffer_copies[j].source, buffer_update_command->destination, command_buffer_copies[j].region); + driver->command_copy_buffer(r_command_buffer, command_buffer_copies[j].source, buffer_update_command->destination, command_buffer_copies[j].region); } } break; case RecordedCommand::TYPE_COMPUTE_LIST: { + if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) { + // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information. + workarounds_state.draw_list_found = false; + + // Create or reuse a command buffer and finish recording the current one. + driver->command_buffer_end(r_command_buffer); + + while (r_command_buffer_pool.buffers_used >= r_command_buffer_pool.buffers.size()) { + RDD::CommandBufferID command_buffer = driver->command_buffer_create(r_command_buffer_pool.pool); + RDD::SemaphoreID command_semaphore = driver->semaphore_create(); + r_command_buffer_pool.buffers.push_back(command_buffer); + r_command_buffer_pool.semaphores.push_back(command_semaphore); + } + + // Start recording on the next usable command buffer from the pool. + uint32_t command_buffer_index = r_command_buffer_pool.buffers_used++; + r_command_buffer = r_command_buffer_pool.buffers[command_buffer_index]; + driver->command_buffer_begin(r_command_buffer); + } + const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command); - _run_compute_list_command(p_command_buffer, compute_list_command->instruction_data(), compute_list_command->instruction_data_size); + _run_compute_list_command(r_command_buffer, compute_list_command->instruction_data(), compute_list_command->instruction_data_size); } break; case RecordedCommand::TYPE_DRAW_LIST: { + if (device.workarounds.avoid_compute_after_draw) { + // Indicate that a draw list was encountered for the workaround. + workarounds_state.draw_list_found = true; + } + const RecordedDrawListCommand *draw_list_command = reinterpret_cast(command); const VectorView clear_values(draw_list_command->clear_values(), draw_list_command->clear_values_count); - driver->command_begin_render_pass(p_command_buffer, draw_list_command->render_pass, draw_list_command->framebuffer, draw_list_command->command_buffer_type, draw_list_command->region, clear_values); - _run_draw_list_command(p_command_buffer, draw_list_command->instruction_data(), draw_list_command->instruction_data_size); - driver->command_end_render_pass(p_command_buffer); + driver->command_begin_render_pass(r_command_buffer, draw_list_command->render_pass, draw_list_command->framebuffer, draw_list_command->command_buffer_type, draw_list_command->region, clear_values); + _run_draw_list_command(r_command_buffer, draw_list_command->instruction_data(), draw_list_command->instruction_data_size); + driver->command_end_render_pass(r_command_buffer); } break; case RecordedCommand::TYPE_TEXTURE_CLEAR: { const RecordedTextureClearCommand *texture_clear_command = reinterpret_cast(command); - driver->command_clear_color_texture(p_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); + driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); } break; case RecordedCommand::TYPE_TEXTURE_COPY: { const RecordedTextureCopyCommand *texture_copy_command = reinterpret_cast(command); - driver->command_copy_texture(p_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_copy_command->region); + driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_copy_command->region); } break; case RecordedCommand::TYPE_TEXTURE_GET_DATA: { const RecordedTextureGetDataCommand *texture_get_data_command = reinterpret_cast(command); const VectorView command_buffer_texture_copy_regions_view(texture_get_data_command->buffer_texture_copy_regions(), texture_get_data_command->buffer_texture_copy_regions_count); - driver->command_copy_texture_to_buffer(p_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); + driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); } break; case RecordedCommand::TYPE_TEXTURE_RESOLVE: { const RecordedTextureResolveCommand *texture_resolve_command = reinterpret_cast(command); - driver->command_resolve_texture(p_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); + driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); } break; case RecordedCommand::TYPE_TEXTURE_UPDATE: { const RecordedTextureUpdateCommand *texture_update_command = reinterpret_cast(command); const RecordedBufferToTextureCopy *command_buffer_to_texture_copies = texture_update_command->buffer_to_texture_copies(); for (uint32_t j = 0; j < texture_update_command->buffer_to_texture_copies_count; j++) { - driver->command_copy_buffer_to_texture(p_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); + driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); } } break; case RecordedCommand::TYPE_CAPTURE_TIMESTAMP: { const RecordedCaptureTimestampCommand *texture_capture_timestamp_command = reinterpret_cast(command); - driver->command_timestamp_write(p_command_buffer, texture_capture_timestamp_command->pool, texture_capture_timestamp_command->index); + driver->command_timestamp_write(r_command_buffer, texture_capture_timestamp_command->pool, texture_capture_timestamp_command->index); } break; default: { DEV_ASSERT(false && "Unknown recorded command type."); @@ -1229,8 +1254,9 @@ void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data } } -void RenderingDeviceGraph::initialize(RDD *p_driver, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame) { +void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Device p_device, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame) { driver = p_driver; + device = p_device; frames.resize(p_frame_count); for (uint32_t i = 0; i < p_frame_count; i++) { @@ -1805,7 +1831,7 @@ void RenderingDeviceGraph::end_label() { command_label_index = -1; } -void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reorder_commands, bool p_full_barriers) { +void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool) { if (command_count == 0) { // No commands have been logged, do nothing. return; @@ -1919,7 +1945,12 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo if (command_count > 0) { int32_t current_label_index = -1; int32_t current_label_level = -1; - _run_label_command_change(p_command_buffer, -1, -1, true, true, nullptr, 0, current_label_index, current_label_level); + _run_label_command_change(r_command_buffer, -1, -1, true, true, nullptr, 0, current_label_index, current_label_level); + + if (device.workarounds.avoid_compute_after_draw) { + // Reset the state of the workaround. + workarounds_state.draw_list_found = false; + } if (p_reorder_commands) { #if PRINT_RENDER_GRAPH @@ -1946,8 +1977,8 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo RecordedCommandSort *level_command_ptr = &commands_sorted[current_level_start]; uint32_t level_command_count = i - current_level_start; _boost_priority_for_render_commands(level_command_ptr, level_command_count, boosted_priority); - _group_barriers_for_render_commands(p_command_buffer, level_command_ptr, level_command_count, p_full_barriers); - _run_render_commands(p_command_buffer, current_level, level_command_ptr, level_command_count, current_label_index, current_label_level); + _group_barriers_for_render_commands(r_command_buffer, level_command_ptr, level_command_count, p_full_barriers); + _run_render_commands(current_level, level_command_ptr, level_command_count, r_command_buffer, r_command_buffer_pool, current_label_index, current_label_level); current_level = commands_sorted[i].level; current_level_start = i; } @@ -1956,20 +1987,20 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo RecordedCommandSort *level_command_ptr = &commands_sorted[current_level_start]; uint32_t level_command_count = command_count - current_level_start; _boost_priority_for_render_commands(level_command_ptr, level_command_count, boosted_priority); - _group_barriers_for_render_commands(p_command_buffer, level_command_ptr, level_command_count, p_full_barriers); - _run_render_commands(p_command_buffer, current_level, level_command_ptr, level_command_count, current_label_index, current_label_level); + _group_barriers_for_render_commands(r_command_buffer, level_command_ptr, level_command_count, p_full_barriers); + _run_render_commands(current_level, level_command_ptr, level_command_count, r_command_buffer, r_command_buffer_pool, current_label_index, current_label_level); #if PRINT_RENDER_GRAPH print_line("COMMANDS", command_count, "LEVELS", current_level + 1); #endif } else { for (uint32_t i = 0; i < command_count; i++) { - _group_barriers_for_render_commands(p_command_buffer, &commands_sorted[i], 1, p_full_barriers); - _run_render_commands(p_command_buffer, i, &commands_sorted[i], 1, current_label_index, current_label_level); + _group_barriers_for_render_commands(r_command_buffer, &commands_sorted[i], 1, p_full_barriers); + _run_render_commands(i, &commands_sorted[i], 1, r_command_buffer, r_command_buffer_pool, current_label_index, current_label_level); } } - _run_label_command_change(p_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level); + _run_label_command_change(r_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level); #if PRINT_COMMAND_RECORDING print_line(vformat("Recorded %d commands", command_count)); diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 3bc63bb2977..a96382e0cc4 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -184,6 +184,20 @@ public: } }; + struct CommandBufferPool { + // Provided by RenderingDevice. + RDD::CommandPoolID pool; + + // Created internally by RenderingDeviceGraph. + LocalVector buffers; + LocalVector semaphores; + uint32_t buffers_used = 0; + }; + + struct WorkaroundsState { + bool draw_list_found = false; + }; + private: struct InstructionList { LocalVector data; @@ -560,6 +574,7 @@ private: }; RDD *driver = nullptr; + RenderingContextDriver::Device device; int64_t tracking_frame = 0; LocalVector command_data; LocalVector command_data_offsets; @@ -582,6 +597,7 @@ private: bool command_synchronization_pending = false; BarrierGroup barrier_group; bool driver_honors_barriers = false; + WorkaroundsState workarounds_state; TightLocalVector frames; uint32_t frame = 0; @@ -608,7 +624,7 @@ private: void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); void _wait_for_secondary_command_buffer_tasks(); - void _run_render_commands(RDD::CommandBufferID p_command_buffer, int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level); + void _run_render_commands(int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool, int32_t &r_current_label_index, int32_t &r_current_label_level); void _run_label_command_change(RDD::CommandBufferID p_command_buffer, int32_t p_new_label_index, int32_t p_new_level, bool p_ignore_previous_value, bool p_use_label_for_empty, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level); void _boost_priority_for_render_commands(RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, uint32_t &r_boosted_priority); void _group_barriers_for_render_commands(RDD::CommandBufferID p_command_buffer, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, bool p_full_memory_barrier); @@ -619,7 +635,7 @@ private: public: RenderingDeviceGraph(); ~RenderingDeviceGraph(); - void initialize(RDD *p_driver, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame); + void initialize(RDD *p_driver, RenderingContextDriver::Device p_device, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame); void finalize(); void begin(); void add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size); @@ -664,7 +680,7 @@ public: void add_synchronization(); void begin_label(const String &p_label_name, const Color &p_color); void end_label(); - void end(RDD::CommandBufferID p_command_buffer, bool p_reorder_commands, bool p_full_barriers); + void end(bool p_reorder_commands, bool p_full_barriers, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool); static ResourceTracker *resource_tracker_create(); static void resource_tracker_free(ResourceTracker *tracker); };