diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 2166d12d31f..2f01376a454 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -8115,10 +8115,67 @@ void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, } void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { + uint32_t barrier_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + uint32_t access_flags = VK_ACCESS_SHADER_READ_BIT; + _compute_list_add_barrier(BARRIER_MASK_COMPUTE, barrier_flags, access_flags); +} + +void RenderingDeviceVulkan::_compute_list_add_barrier(BitField p_post_barrier, uint32_t p_barrier_flags, uint32_t p_access_flags) { + ERR_FAIL_COND(!compute_list); + + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count); + } + + image_barrier_count = 0; // We'll count how many we end up issuing. + + for (Texture *E : compute_list->state.textures_to_sampled_layout) { + if (E->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + VkImageMemoryBarrier &image_memory_barrier = image_barriers[image_barrier_count++]; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = nullptr; + image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = p_access_flags; + image_memory_barrier.oldLayout = E->layout; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.image = E->image; + image_memory_barrier.subresourceRange.aspectMask = E->read_aspect_mask; + image_memory_barrier.subresourceRange.baseMipLevel = E->base_mipmap; + image_memory_barrier.subresourceRange.levelCount = E->mipmaps; + image_memory_barrier.subresourceRange.baseArrayLayer = E->base_layer; + image_memory_barrier.subresourceRange.layerCount = E->layers; + + E->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + if (E->used_in_frame != frames_drawn) { + E->used_in_transfer = false; + E->used_in_raster = false; + E->used_in_compute = false; + E->used_in_frame = frames_drawn; + } + } + + if (p_barrier_flags) { + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + mem_barrier.dstAccessMask = p_access_flags; + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, p_barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } else if (image_barrier_count) { + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, image_barrier_count, image_barriers); + } + #ifdef FORCE_FULL_BARRIER _full_barrier(true); -#else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, true); #endif } @@ -8139,62 +8196,7 @@ void RenderingDeviceVulkan::compute_list_end(BitField p_post_barrie barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } - - if (barrier_flags == 0) { - barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } - - VkImageMemoryBarrier *image_barriers = nullptr; - - uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size(); - - if (image_barrier_count) { - image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count); - } - - uint32_t barrier_idx = 0; - - for (Texture *E : compute_list->state.textures_to_sampled_layout) { - VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++]; - image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = access_flags; - image_memory_barrier.oldLayout = E->layout; - image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_memory_barrier.image = E->image; - image_memory_barrier.subresourceRange.aspectMask = E->read_aspect_mask; - image_memory_barrier.subresourceRange.baseMipLevel = E->base_mipmap; - image_memory_barrier.subresourceRange.levelCount = E->mipmaps; - image_memory_barrier.subresourceRange.baseArrayLayer = E->base_layer; - image_memory_barrier.subresourceRange.layerCount = E->layers; - - E->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - if (E->used_in_frame != frames_drawn) { - E->used_in_transfer = false; - E->used_in_raster = false; - E->used_in_compute = false; - E->used_in_frame = frames_drawn; - } - } - - VkMemoryBarrier mem_barrier; - mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - mem_barrier.pNext = nullptr; - mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - mem_barrier.dstAccessMask = access_flags; - - if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); - } - -#ifdef FORCE_FULL_BARRIER - _full_barrier(true); -#endif + _compute_list_add_barrier(p_post_barrier, barrier_flags, access_flags); memdelete(compute_list); compute_list = nullptr; diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 91a09fa970b..3f01895745b 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -954,6 +954,8 @@ class RenderingDeviceVulkan : public RenderingDevice { ComputeList *compute_list = nullptr; + void _compute_list_add_barrier(BitField p_post_barrier, uint32_t p_barrier_flags, uint32_t p_access_flags); + /**************************/ /**** FRAME MANAGEMENT ****/ /**************************/