diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index b042d415244..fb278a4d56c 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -538,15 +538,6 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso #endif ResourceInfo::States *res_states = p_resource->states_ptr; - - if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { - if (unlikely(!res_states->xfamily_fallback.subresources_dirty.is_empty())) { - uint32_t subres_qword = p_subresource / 64; - uint64_t subres_mask = (uint64_t(1) << (p_subresource % 64)); - res_states->xfamily_fallback.subresources_dirty[subres_qword] |= subres_mask; - } - } - D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; // Transitions can be considered redundant if the current state has all the bits of the new state. @@ -869,7 +860,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel // but also if you give a rounded size at that point because it will extend beyond the // memory of the resource. Therefore, it seems the only way is to create it with a // rounded size. - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); if (p_usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } else { @@ -878,7 +869,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; switch (p_allocation_type) { case MEMORY_ALLOCATION_TYPE_CPU: { bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT); @@ -886,7 +876,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel if (is_src && !is_dst) { // Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM. allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD; - initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; } if (is_dst && !is_src) { // Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads. @@ -904,13 +893,27 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel ComPtr buffer; ComPtr allocation; - HRESULT res = allocator->CreateResource( - &allocation_desc, - &resource_desc, - initial_state, - nullptr, - allocation.GetAddressOf(), - IID_PPV_ARGS(buffer.GetAddressOf())); + HRESULT res; + if (barrier_capabilities.enhanced_barriers_supported) { + res = allocator->CreateResource3( + &allocation_desc, + &resource_desc, + D3D12_BARRIER_LAYOUT_UNDEFINED, + nullptr, + 0, + nullptr, + allocation.GetAddressOf(), + IID_PPV_ARGS(buffer.GetAddressOf())); + } else { + res = allocator->CreateResource( + &allocation_desc, + reinterpret_cast(&resource_desc), + D3D12_RESOURCE_STATE_COMMON, + nullptr, + allocation.GetAddressOf(), + IID_PPV_ARGS(buffer.GetAddressOf())); + } + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + vformat("0x%08ux", (uint64_t)res) + "."); // Bookkeep. @@ -919,11 +922,10 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel buf_info->resource = buffer.Get(); buf_info->owner_info.resource = buffer; buf_info->owner_info.allocation = allocation; - buf_info->owner_info.states.subresource_states.push_back(initial_state); + buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON); buf_info->states_ptr = &buf_info->owner_info.states; buf_info->size = p_size; buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - buf_info->flags.is_for_upload = allocation_desc.HeapType == D3D12_HEAP_TYPE_UPLOAD; return BufferID(buf_info); } @@ -1052,8 +1054,7 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, BitFi if (p_aspect_bits.has_flag(TEXTURE_ASPECT_DEPTH_BIT)) { DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX); aspect = TEXTURE_ASPECT_DEPTH; - } - if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) { + } else if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) { DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX); aspect = TEXTURE_ASPECT_STENCIL; } @@ -1080,6 +1081,10 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, Textu } } +UINT RenderingDeviceDriverD3D12::_compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset) { + return D3D12CalcSubresource(p_layers.mipmap, p_layers.base_layer + p_layer_offset, _compute_plane_slice(p_texture->format, p_layers.aspect), p_texture->desc.MipLevels, p_texture->desc.ArraySize()); +} + void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info) { uint32_t planes = 1; if ((p_tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { @@ -1117,6 +1122,64 @@ void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo } } +bool RenderingDeviceDriverD3D12::_unordered_access_supported_by_format(DataFormat p_format) { + switch (p_format) { + case DATA_FORMAT_R4G4_UNORM_PACK8: + case DATA_FORMAT_R4G4B4A4_UNORM_PACK16: + case DATA_FORMAT_B4G4R4A4_UNORM_PACK16: + case DATA_FORMAT_R5G6B5_UNORM_PACK16: + case DATA_FORMAT_B5G6R5_UNORM_PACK16: + case DATA_FORMAT_R5G5B5A1_UNORM_PACK16: + case DATA_FORMAT_B5G5R5A1_UNORM_PACK16: + case DATA_FORMAT_A1R5G5B5_UNORM_PACK16: + case DATA_FORMAT_A8B8G8R8_UNORM_PACK32: + case DATA_FORMAT_A8B8G8R8_SNORM_PACK32: + case DATA_FORMAT_A8B8G8R8_USCALED_PACK32: + case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32: + case DATA_FORMAT_A8B8G8R8_UINT_PACK32: + case DATA_FORMAT_A8B8G8R8_SINT_PACK32: + case DATA_FORMAT_A8B8G8R8_SRGB_PACK32: + case DATA_FORMAT_A2R10G10B10_UNORM_PACK32: + case DATA_FORMAT_A2R10G10B10_SNORM_PACK32: + case DATA_FORMAT_A2R10G10B10_USCALED_PACK32: + case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32: + case DATA_FORMAT_A2R10G10B10_UINT_PACK32: + case DATA_FORMAT_A2R10G10B10_SINT_PACK32: + case DATA_FORMAT_A2B10G10R10_UNORM_PACK32: + case DATA_FORMAT_A2B10G10R10_SNORM_PACK32: + case DATA_FORMAT_A2B10G10R10_USCALED_PACK32: + case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32: + case DATA_FORMAT_A2B10G10R10_UINT_PACK32: + case DATA_FORMAT_A2B10G10R10_SINT_PACK32: + case DATA_FORMAT_B10G11R11_UFLOAT_PACK32: + case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32: + case DATA_FORMAT_X8_D24_UNORM_PACK32: + case DATA_FORMAT_R10X6_UNORM_PACK16: + case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16: + case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16: + case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16: + case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16: + case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16: + case DATA_FORMAT_R12X4_UNORM_PACK16: + case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16: + case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16: + case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16: + case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16: + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16: + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: + return false; + default: + return true; + } +} + RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p_format, const TextureView &p_view) { // Using D3D12_RESOURCE_DESC1. Thanks to the layout, it's sliceable down to D3D12_RESOURCE_DESC if needed. CD3DX12_RESOURCE_DESC1 resource_desc = {}; @@ -1137,12 +1200,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p resource_desc.Format = RD_TO_D3D12_FORMAT[p_format.format].family; // If views of different families are wanted, special setup is needed for proper sharing among them. - // Two options here: - // 1. If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). - // 2. Otherwise, fall back to an approach based on having multiple versions of the resource and copying as needed. [[CROSS_FAMILY_FALLBACK]] + // If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) { relaxed_casting_available = true; - relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size()); + relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size() + 1); relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format; relaxed_casting_format_count++; } @@ -1156,9 +1217,9 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if (RD_TO_D3D12_FORMAT[curr_format].family != RD_TO_D3D12_FORMAT[p_format.format].family) { cross_family_sharing = true; - if (!relaxed_casting_available) { - break; - } + } + + if (relaxed_casting_available) { relaxed_casting_formats[relaxed_casting_format_count] = RD_TO_D3D12_FORMAT[curr_format].general_format; relaxed_casting_format_count++; } @@ -1185,7 +1246,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if ((p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; } else { - if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT)) { + if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) && _unordered_access_supported_by_format(p_format.format)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // For clearing via UAV. } } @@ -1242,17 +1303,19 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p D3D12_CLEAR_VALUE *clear_value_ptr = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : nullptr; { HRESULT res = E_FAIL; - if (cross_family_sharing && relaxed_casting_available) { + if (barrier_capabilities.enhanced_barriers_supported || (cross_family_sharing && relaxed_casting_available)) { + // Create with undefined layout if enhanced barriers are supported. Leave as common otherwise for interop with legacy barriers. + D3D12_BARRIER_LAYOUT initial_layout = barrier_capabilities.enhanced_barriers_supported ? D3D12_BARRIER_LAYOUT_UNDEFINED : D3D12_BARRIER_LAYOUT_COMMON; res = allocator->CreateResource3( &allocation_desc, &resource_desc, - D3D12_BARRIER_LAYOUT_COMMON, // Needed for barrier interop. + initial_layout, clear_value_ptr, relaxed_casting_format_count, relaxed_casting_formats, allocation.GetAddressOf(), IID_PPV_ARGS(main_texture.GetAddressOf())); - initial_state = D3D12_RESOURCE_STATE_COMMON; // Needed for barrier interop. + initial_state = D3D12_RESOURCE_STATE_COMMON; } else { res = allocator->CreateResource( &allocation_desc, @@ -1353,7 +1416,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p tex_info->mipmaps = resource_desc.MipLevels; tex_info->view_descs.srv = srv_desc; tex_info->view_descs.uav = uav_desc; - if ((p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) { + + if (!barrier_capabilities.enhanced_barriers_supported && (p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) { + // Fallback to clear resources when they're first used in a uniform set. Not necessary if enhanced barriers + // are supported, as the discard flag will be used instead when transitioning from an undefined layout. textures_pending_clear.add(&tex_info->pending_clear); } @@ -1380,45 +1446,8 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex ComPtr new_texture; ComPtr new_allocation; - ID3D12Resource *resource = nullptr; + ID3D12Resource *resource = owner_tex_info->resource; CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc; - bool cross_family = RD_TO_D3D12_FORMAT[p_view.format].family != RD_TO_D3D12_FORMAT[owner_tex_info->format].family; - if (cross_family && !format_capabilities.relaxed_casting_supported) { - // [[CROSS_FAMILY_FALLBACK]]. - // We have to create a new texture of the alternative format. - - D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; - - if (p_slice_type != -1) { -#ifdef DEV_ENABLED - // Actual slicing is not contemplated. If ever needed, let's at least realize. - if (p_slice_type != -1) { - uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; - uint32_t slice_subresorce_count = p_mipmaps * p_layers; - DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); - } -#endif - new_tex_resource_desc.DepthOrArraySize = p_layers; - new_tex_resource_desc.MipLevels = p_mipmaps; - } - new_tex_resource_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].family; - new_tex_resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // Alternative formats can only be used as SRVs. - - HRESULT res = allocator->CreateResource( - &allocation_desc, - &new_tex_resource_desc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - new_allocation.GetAddressOf(), - IID_PPV_ARGS(new_texture.GetAddressOf())); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), vformat("D3D12MA::CreateResource failed with error 0x%08ux.", (uint64_t)res)); - - resource = new_texture.Get(); - } else { - resource = owner_tex_info->resource; - } // Describe views. @@ -1528,58 +1557,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex TextureInfo *tex_info = VersatileResource::allocate(resources_allocator); tex_info->resource = resource; - if (new_texture.Get()) { - // [[CROSS_FAMILY_FALLBACK]]. - - DEV_ASSERT(cross_family && !format_capabilities.relaxed_casting_supported); - - uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; -#ifdef DEV_ENABLED - // Actual slicing is not contemplated. If ever needed, let's at least realize. - if (p_slice_type != -1) { - uint32_t slice_subresorce_count = p_mipmaps * p_layers; - DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); - } -#endif - - tex_info->owner_info.resource = new_texture; - tex_info->owner_info.allocation = new_allocation; - tex_info->owner_info.states.subresource_states.resize(new_texture_subresorce_count); - for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) { - tex_info->owner_info.states.subresource_states[i] = D3D12_RESOURCE_STATE_COPY_DEST; - } - tex_info->states_ptr = &tex_info->owner_info.states; - - ResourceInfo::States::CrossFamillyFallback &xfamily = owner_tex_info->owner_info.states.xfamily_fallback; - if (xfamily.subresources_dirty.is_empty()) { - uint32_t items_required = STEPIFY(new_texture_subresorce_count, sizeof(uint64_t)) / sizeof(uint64_t); - xfamily.subresources_dirty.resize(items_required); - memset(xfamily.subresources_dirty.ptr(), 255, sizeof(uint64_t) * xfamily.subresources_dirty.size()); - - // Create buffer for non-direct copy if it's a format not supporting reinterpret-copy. - DEV_ASSERT(!xfamily.interim_buffer.Get()); - if (owner_tex_info->format == DATA_FORMAT_R16_UINT && p_view.format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) { - uint32_t row_pitch = STEPIFY(owner_tex_info->desc.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - uint32_t buffer_size = sizeof(uint16_t) * row_pitch * owner_tex_info->desc.Height * owner_tex_info->desc.Depth(); - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(buffer_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); - resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - - D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - - HRESULT res = allocator->CreateResource( - &allocation_desc, - &resource_desc, - D3D12_RESOURCE_STATE_COPY_SOURCE, // Makes the code that makes the copy easier. - nullptr, - xfamily.interim_buffer_alloc.GetAddressOf(), - IID_PPV_ARGS(xfamily.interim_buffer.GetAddressOf())); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - } - } - } else { - tex_info->states_ptr = owner_tex_info->states_ptr; - } + tex_info->states_ptr = owner_tex_info->states_ptr; tex_info->format = p_view.format; tex_info->desc = new_tex_resource_desc; if (p_slice_type == -1) { @@ -1710,6 +1688,28 @@ BitField RenderingDeviceDriverD3D12::texture_get_usages_s return supported; } +bool RenderingDeviceDriverD3D12::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) { + r_raw_reinterpretation = false; + + if (format_capabilities.relaxed_casting_supported) { + // Relaxed casting is supported, there should be no need to check for format family compatibility. + return true; + } else { + TextureInfo *tex_info = (TextureInfo *)p_texture.id; + if (tex_info->format == DATA_FORMAT_R16_UINT && p_format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) { + // Specific cases that require buffer reinterpretation. + r_raw_reinterpretation = true; + return false; + } else if (RD_TO_D3D12_FORMAT[tex_info->format].family != RD_TO_D3D12_FORMAT[p_format].family) { + // Format family is different but copying resources directly is possible. + return false; + } else { + // Format family is the same and the view can just cast the format. + return true; + } + } +} + /*****************/ /**** SAMPLER ****/ /*****************/ @@ -1842,20 +1842,328 @@ void RenderingDeviceDriverD3D12::vertex_format_free(VertexFormatID p_vertex_form /**** BARRIERS ****/ /******************/ -void RenderingDeviceDriverD3D12::command_pipeline_barrier( - CommandBufferID p_cmd_buffer, - BitField p_src_stages, - BitField p_dst_stages, +static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_texture_layout) { + switch (p_texture_layout) { + case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL: + return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_ACCESS_RENDER_TARGET; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL: + return D3D12_BARRIER_ACCESS_COPY_SOURCE; + case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL: + return D3D12_BARRIER_ACCESS_COPY_DEST; + case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL: + return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; + case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: + return D3D12_BARRIER_ACCESS_RESOLVE_DEST; + case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; + default: + return D3D12_BARRIER_ACCESS_NO_ACCESS; + } +} + +static void _rd_access_to_d3d12_and_mask(BitField p_access, RDD::TextureLayout p_texture_layout, D3D12_BARRIER_ACCESS &r_access, D3D12_BARRIER_SYNC &r_sync_mask) { + r_access = D3D12_BARRIER_ACCESS_COMMON; + r_sync_mask = D3D12_BARRIER_SYNC_NONE; + + if (p_access.has_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT; + r_sync_mask |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_INDEX_BUFFER; + r_sync_mask |= D3D12_BARRIER_SYNC_INDEX_INPUT | D3D12_BARRIER_SYNC_DRAW; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER; + r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER; + r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | + D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_COPY_SOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_COPY; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_COPY_DEST; + r_sync_mask |= D3D12_BARRIER_SYNC_COPY; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RESOLVE_DEST; + r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_ALL_SHADING; + } + + const D3D12_BARRIER_SYNC unordered_access_mask = D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | + D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; + + if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + r_sync_mask |= unordered_access_mask; + } + + // These access bits only have compatibility with certain layouts unlike in Vulkan where they imply specific operations in the same layout. + if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + r_sync_mask |= unordered_access_mask; + } else if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT)) { + if (p_texture_layout == RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL) { + // Unordered access must be enforced if the texture is using the storage layout. + r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + r_sync_mask |= unordered_access_mask; + } else { + r_access |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING; + } + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET; + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } else if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)) { + r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } +} + +static void _rd_stages_to_d3d12(BitField p_stages, D3D12_BARRIER_SYNC &r_sync) { + if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) { + r_sync = D3D12_BARRIER_SYNC_ALL; + } else { + if (p_stages.has_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_INDEX_INPUT; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { + // There's no granularity for tessellation or geometry stages. The specification defines it as part of vertex shading. + r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)) { + // Covers both read and write operations for depth stencil. + r_sync |= D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_RENDER_TARGET; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_COMPUTE_SHADING; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_COPY; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_RESOLVE; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT)) { + r_sync |= D3D12_BARRIER_SYNC_DRAW; + } + } +} + +static void _rd_stages_and_access_to_d3d12(BitField p_stages, RDD::TextureLayout p_texture_layout, BitField p_access, D3D12_BARRIER_SYNC &r_sync, D3D12_BARRIER_ACCESS &r_access) { + D3D12_BARRIER_SYNC sync_mask; + r_sync = D3D12_BARRIER_SYNC_NONE; + + if (p_texture_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) { + // Undefined texture layouts are a special case where no access bits or synchronization scopes are allowed. + r_access = D3D12_BARRIER_ACCESS_NO_ACCESS; + return; + } + + // Convert access bits to the D3D12 barrier access bits. + _rd_access_to_d3d12_and_mask(p_access, p_texture_layout, r_access, sync_mask); + + if (p_texture_layout != RDD::TEXTURE_LAYOUT_MAX) { + // Only allow the access bits compatible with the texture layout. + r_access &= _rd_texture_layout_access_mask(p_texture_layout); + } + + // Convert stage bits to the D3D12 synchronization scope bits. + _rd_stages_to_d3d12(p_stages, r_sync); + + // Only enable synchronization stages compatible with the access bits that were used. + r_sync &= sync_mask; + + if (r_sync == D3D12_BARRIER_SYNC_NONE) { + if (p_access.is_empty()) { + // No valid synchronization scope was defined and no access in particular is required. + r_access = D3D12_BARRIER_ACCESS_NO_ACCESS; + } else { + // Access is required but the synchronization scope wasn't compatible. We fall back to the global synchronization scope and access. + r_sync = D3D12_BARRIER_SYNC_ALL; + r_access = D3D12_BARRIER_ACCESS_COMMON; + } + } +} + +static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::TextureLayout p_texture_layout) { + switch (p_texture_layout) { + case RDD::TEXTURE_LAYOUT_UNDEFINED: + return D3D12_BARRIER_LAYOUT_UNDEFINED; + case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL: + return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS; + case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_LAYOUT_RENDER_TARGET; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ; + case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE; + case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL: + return D3D12_BARRIER_LAYOUT_COPY_SOURCE; + case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL: + return D3D12_BARRIER_LAYOUT_COPY_DEST; + case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL: + return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE; + case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: + return D3D12_BARRIER_LAYOUT_RESOLVE_DEST; + case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE; + default: + DEV_ASSERT(false && "Unknown texture layout."); + return D3D12_BARRIER_LAYOUT_UNDEFINED; + } +} + +void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_buffer, + BitField p_src_stages, + BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, VectorView p_texture_barriers) { - if (p_src_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT) && p_dst_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT)) { - // Looks like the intent is a full barrier. - // In the resource barriers world, we can force a full barrier by discarding some resource, as per - // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#synchronous-copy-discard-and-resolve. - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; - cmd_buf_info->cmd_list->DiscardResource(frames[frame_idx].aux_resource->GetResource(), nullptr); + if (!barrier_capabilities.enhanced_barriers_supported) { + // Enhanced barriers are a requirement for this function. + return; } + + if (p_memory_barriers.size() == 0 && p_buffer_barriers.size() == 0 && p_texture_barriers.size() == 0) { + // At least one barrier must be present in the arguments. + return; + } + + // The command list must support the required interface. + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id); + ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr; + HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7)); + ERR_FAIL_COND(FAILED(res)); + + // Convert the RDD barriers to D3D12 enhanced barriers. + thread_local LocalVector global_barriers; + thread_local LocalVector buffer_barriers; + thread_local LocalVector texture_barriers; + global_barriers.clear(); + buffer_barriers.clear(); + texture_barriers.clear(); + + D3D12_GLOBAL_BARRIER global_barrier = {}; + for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { + const MemoryBarrier &memory_barrier = p_memory_barriers[i]; + _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.src_access, global_barrier.SyncBefore, global_barrier.AccessBefore); + _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.dst_access, global_barrier.SyncAfter, global_barrier.AccessAfter); + global_barriers.push_back(global_barrier); + } + + D3D12_BUFFER_BARRIER buffer_barrier_d3d12 = {}; + buffer_barrier_d3d12.Offset = 0; + buffer_barrier_d3d12.Size = UINT64_MAX; // The specification says this must be the size of the buffer barrier. + for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) { + const BufferBarrier &buffer_barrier_rd = p_buffer_barriers[i]; + const BufferInfo *buffer_info = (const BufferInfo *)(buffer_barrier_rd.buffer.id); + _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.src_access, buffer_barrier_d3d12.SyncBefore, buffer_barrier_d3d12.AccessBefore); + _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.dst_access, buffer_barrier_d3d12.SyncAfter, buffer_barrier_d3d12.AccessAfter); + buffer_barrier_d3d12.pResource = buffer_info->resource; + buffer_barriers.push_back(buffer_barrier_d3d12); + } + + D3D12_TEXTURE_BARRIER texture_barrier_d3d12 = {}; + for (uint32_t i = 0; i < p_texture_barriers.size(); i++) { + const TextureBarrier &texture_barrier_rd = p_texture_barriers[i]; + const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id); + _rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore); + _rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter); + texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout); + texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout); + texture_barrier_d3d12.pResource = texture_info->resource; + texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap; + texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count; + texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer; + texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count; + texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect); + texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format); + texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE; + texture_barriers.push_back(texture_barrier_d3d12); + } + + // Define the barrier groups and execute. + D3D12_BARRIER_GROUP barrier_groups[3] = {}; + barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL; + barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER; + barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE; + barrier_groups[0].NumBarriers = global_barriers.size(); + barrier_groups[1].NumBarriers = buffer_barriers.size(); + barrier_groups[2].NumBarriers = texture_barriers.size(); + barrier_groups[0].pGlobalBarriers = global_barriers.ptr(); + barrier_groups[1].pBufferBarriers = buffer_barriers.ptr(); + barrier_groups[2].pTextureBarriers = texture_barriers.ptr(); + cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups); } /****************/ @@ -3476,6 +3784,8 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect zstd_size = STEPIFY(zstd_size, 4); read_offset += zstd_size; ERR_FAIL_COND_V(read_offset > binsize, ShaderID()); + + r_shader_desc.stages.push_back(ShaderStage(stage)); } const uint8_t *root_sig_data_ptr = binptr + read_offset; @@ -3807,6 +4117,10 @@ void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) { // ----- COMMANDS ----- void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + if (barrier_capabilities.enhanced_barriers_supported) { + return; + } + // Perform pending blackouts. { SelfList *E = textures_pending_clear.first(); @@ -3814,7 +4128,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff TextureSubresourceRange subresources; subresources.layer_count = E->self()->layers; subresources.mipmap_count = E->self()->mipmaps; - command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_GENERAL, Color(), subresources); + command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_UNDEFINED, Color(), subresources); SelfList *next = E->next(); E->remove_from_list(); @@ -3947,34 +4261,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff for (uint32_t i = 0; i < tex_info->layers; i++) { for (uint32_t j = 0; j < tex_info->mipmaps; j++) { uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - - if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) { - // [[CROSS_FAMILY_FALLBACK]]. - if (tex_info->owner_info.resource && tex_info->main_texture && tex_info->main_texture != tex_info) { - uint32_t subres_qword = subresource / 64; - uint64_t subres_mask = (uint64_t(1) << (subresource % 64)); - if ((tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] & subres_mask)) { - // Prepare for copying the write-to texture to this one, if out-of-date. - _resource_transition_batch(tex_info->main_texture, subresource, planes, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(tex_info, subresource, planes, D3D12_RESOURCE_STATE_COPY_DEST); - - CommandBufferInfo::FamilyFallbackCopy ffc; - ffc.texture = tex_info; - ffc.subresource = subresource; - ffc.mipmap = j; - ffc.dst_wanted_state = wanted_state; - - CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; - cmd_buf_info->family_fallback_copies.resize(cmd_buf_info->family_fallback_copies.size() + 1); - cmd_buf_info->family_fallback_copies[cmd_buf_info->family_fallback_copy_count] = ffc; - cmd_buf_info->family_fallback_copy_count++; - - tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] &= ~subres_mask; - } - continue; - } - } - _resource_transition_batch(tex_info, subresource, planes, wanted_state); } } @@ -3986,55 +4272,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff if (p_set_index == shader_info_in->sets.size() - 1) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); - - // [[CROSS_FAMILY_FALLBACK]]. - for (uint32_t i = 0; i < cmd_buf_info->family_fallback_copy_count; i++) { - const CommandBufferInfo::FamilyFallbackCopy &ffc = cmd_buf_info->family_fallback_copies[i]; - - D3D12_TEXTURE_COPY_LOCATION dst_tex = {}; - dst_tex.pResource = ffc.texture->resource; - dst_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_tex.SubresourceIndex = ffc.subresource; - - D3D12_TEXTURE_COPY_LOCATION src_tex = {}; - src_tex.pResource = ffc.texture->main_texture->resource; - src_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_tex.SubresourceIndex = ffc.subresource; - - const ResourceInfo::States::CrossFamillyFallback &xfamily = ffc.texture->main_texture->owner_info.states.xfamily_fallback; - if (xfamily.interim_buffer.Get()) { - // Must copy via a buffer due to reinterpret-copy known not to be available for these data types. - D3D12_TEXTURE_COPY_LOCATION buf_loc = {}; - buf_loc.pResource = xfamily.interim_buffer.Get(); - buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - buf_loc.PlacedFootprint.Offset = 0; - buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->main_texture->desc.Format; - buf_loc.PlacedFootprint.Footprint.Width = MAX(1u, ffc.texture->main_texture->desc.Width >> ffc.mipmap); - buf_loc.PlacedFootprint.Footprint.Height = MAX(1u, ffc.texture->main_texture->desc.Height >> ffc.mipmap); - buf_loc.PlacedFootprint.Footprint.Depth = MAX(1u, (uint32_t)ffc.texture->main_texture->desc.Depth() >> ffc.mipmap); - buf_loc.PlacedFootprint.Footprint.RowPitch = STEPIFY(buf_loc.PlacedFootprint.Footprint.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST); - cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); - - cmd_buf_info->cmd_list->CopyTextureRegion(&buf_loc, 0, 0, 0, &src_tex, nullptr); - - barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); - cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); - - buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->desc.Format; - cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &buf_loc, nullptr); - } else { - // Direct copy is possible. - cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &src_tex, nullptr); - } - - // Set the specific SRV state we wanted from the beginning to the alternative version of the texture. - _resource_transition_batch(ffc.texture, ffc.subresource, 1, ffc.dst_wanted_state); - } - cmd_buf_info->family_fallback_copy_count = 0; - - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } } @@ -4311,8 +4548,10 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff } } - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; @@ -4352,9 +4591,11 @@ void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffe BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id; BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id; - _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } for (uint32_t i = 0; i < p_regions.size(); i++) { cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size); @@ -4366,43 +4607,37 @@ void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buff TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id; TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id; - for (uint32_t i = 0; i < p_regions.size(); i++) { - UINT src_subresource = D3D12CalcSubresource( - p_regions[i].src_subresources.mipmap, - p_regions[i].src_subresources.base_layer, - _compute_plane_slice(src_tex_info->format, p_regions[i].src_subresources.aspect), - src_tex_info->desc.MipLevels, - src_tex_info->desc.ArraySize()); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - - UINT dst_subresource = D3D12CalcSubresource( - p_regions[i].dst_subresources.mipmap, - p_regions[i].dst_subresources.base_layer, - _compute_plane_slice(dst_tex_info->format, p_regions[i].dst_subresources.aspect), - dst_tex_info->desc.MipLevels, - dst_tex_info->desc.ArraySize()); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + if (!barrier_capabilities.enhanced_barriers_supported) { + // Batch all barrier transitions for the textures before performing the copies. + for (uint32_t i = 0; i < p_regions.size(); i++) { + uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count); + for (uint32_t j = 0; j < layer_count; j++) { + UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j); + UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j); + _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + } + } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } - CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource); - CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource); - - CD3DX12_BOX src_box( - p_regions[i].src_offset.x, - p_regions[i].src_offset.y, - p_regions[i].src_offset.z, - p_regions[i].src_offset.x + p_regions[i].size.x, - p_regions[i].src_offset.y + p_regions[i].size.y, - p_regions[i].src_offset.z + p_regions[i].size.z); - - cmd_buf_info->cmd_list->CopyTextureRegion( - &dst_location, - p_regions[i].dst_offset.x, - p_regions[i].dst_offset.y, - p_regions[i].dst_offset.z, - &src_location, - &src_box); + CD3DX12_BOX src_box; + for (uint32_t i = 0; i < p_regions.size(); i++) { + uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count); + for (uint32_t j = 0; j < layer_count; j++) { + UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j); + UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j); + CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource); + CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource); + src_box.left = p_regions[i].src_offset.x; + src_box.top = p_regions[i].src_offset.y; + src_box.front = p_regions[i].src_offset.z; + src_box.right = p_regions[i].src_offset.x + p_regions[i].size.x; + src_box.bottom = p_regions[i].src_offset.y + p_regions[i].size.y; + src_box.back = p_regions[i].src_offset.z + p_regions[i].size.z; + cmd_buf_info->cmd_list->CopyTextureRegion(&dst_location, p_regions[i].dst_offset.x, p_regions[i].dst_offset.y, p_regions[i].dst_offset.z, &src_location, &src_box); + } } } @@ -4412,12 +4647,12 @@ void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_b TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id; UINT src_subresource = D3D12CalcSubresource(p_src_mipmap, p_src_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize()); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); - UINT dst_subresource = D3D12CalcSubresource(p_dst_mipmap, p_dst_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize()); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); - - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } cmd_buf_info->cmd_list->ResolveSubresource(dst_tex_info->resource, dst_subresource, src_tex_info->resource, src_subresource, RD_TO_D3D12_FORMAT[src_tex_info->format].general_format); } @@ -4458,7 +4693,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } } - _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET); + if (!barrier_capabilities.enhanced_barriers_supported) { + _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET); + } for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); @@ -4476,7 +4713,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c frames[frame_idx].desc_heap_walkers.rtv.advance(); } - } else { + } else if (tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) { // Clear via UAV. _command_check_descriptor_sets(p_cmd_buffer); @@ -4501,7 +4738,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } } - _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + if (!barrier_capabilities.enhanced_barriers_supported) { + _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); @@ -4522,6 +4761,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c (UINT)p_color.get_b8(), (UINT)p_color.get_a8(), }; + cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint( frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(), frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), @@ -4533,6 +4773,8 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c frames[frame_idx].desc_heap_walkers.resources.advance(); frames[frame_idx].desc_heap_walkers.aux.advance(); } + } else { + ERR_FAIL_MSG("Cannot clear texture because its format does not support UAV writes. You'll need to update its contents through another method."); } } @@ -4540,8 +4782,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id; - - if (buf_info->flags.is_for_upload) { + if (!barrier_capabilities.enhanced_barriers_supported) { _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } @@ -4569,20 +4810,22 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID STEPIFY(p_regions[i].texture_region_size.y, block_h), p_regions[i].texture_region_size.z); - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT dst_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); + if (!barrier_capabilities.enhanced_barriers_supported) { + for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { + UINT dst_subresource = D3D12CalcSubresource( + p_regions[i].texture_subresources.mipmap, + p_regions[i].texture_subresources.base_layer + j, + _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); + CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + } + + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { UINT dst_subresource = D3D12CalcSubresource( p_regions[i].texture_subresources.mipmap, @@ -4608,25 +4851,29 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID TextureInfo *tex_info = (TextureInfo *)p_src_texture.id; BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id; - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + } uint32_t block_w = 0, block_h = 0; get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h); for (uint32_t i = 0; i < p_regions.size(); i++) { - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT src_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); + if (!barrier_capabilities.enhanced_barriers_supported) { + for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { + UINT src_subresource = D3D12CalcSubresource( + p_regions[i].texture_subresources.mipmap, + p_regions[i].texture_subresources.base_layer + j, + _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + } + + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { UINT src_subresource = D3D12CalcSubresource( p_regions[i].texture_subresources.mipmap, @@ -4775,22 +5022,25 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd } }; - for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { - TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; - if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { - _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET); - } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { - _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE); - } else { - DEV_ASSERT(false); + if (fb_info->is_screen || !barrier_capabilities.enhanced_barriers_supported) { + // Screen framebuffers must perform this transition even if enhanced barriers are supported. + for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { + TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; + if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET); + } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { + _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE); + } else { + DEV_ASSERT(false); + } + } + if (fb_info->vrs_attachment) { + TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id; + _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE); } - } - if (fb_info->vrs_attachment) { - TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id; - _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE); - } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } cmd_buf_info->render_pass_state.region_rect = CD3DX12_RECT( p_rect.position.x, @@ -5152,8 +5402,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect(CommandBuf CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); } @@ -5162,9 +5415,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect_count(Comm _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } @@ -5172,8 +5428,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect(CommandBufferID p_ CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); } @@ -5182,9 +5441,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } @@ -5203,10 +5465,15 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {}; cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i]; cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i]; - - _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + } } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->render_pass_state.vertex_buffer_count = p_binding_count; } @@ -5219,8 +5486,10 @@ void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferI d3d12_ib_view.SizeInBytes = buffer_info->size - p_offset; d3d12_ib_view.Format = p_format == INDEX_BUFFER_FORMAT_UINT16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; - _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } cmd_buf_info->cmd_list->IASetIndexBuffer(&d3d12_ib_view); } @@ -5616,15 +5885,21 @@ void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferI void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->Dispatch(p_x_groups, p_y_groups, p_z_groups); } void RenderingDeviceDriverD3D12::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + if (!barrier_capabilities.enhanced_barriers_supported) { + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + } + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch.Get(), 1, indirect_buf_info->resource, p_offset, nullptr, 0); } @@ -5939,11 +6214,7 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { switch (p_trait) { case API_TRAIT_HONORS_PIPELINE_BARRIERS: - // TODO: - // 1. Map fine/Vulkan/enhanced barriers to legacy barriers as closely as possible - // so there's still some advantage even without enhanced barriers available. - // 2. Implement enhanced barriers and return true where available. - return 0; + return barrier_capabilities.enhanced_barriers_supported; case API_TRAIT_SHADER_CHANGE_INVALIDATION: return (uint64_t)SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH; case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT: @@ -5952,6 +6223,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { return D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR: return false; + case API_TRAIT_CLEARS_WITH_COPY_ENGINE: + return false; default: return RenderingDeviceDriver::api_trait_get(p_trait); } @@ -6101,6 +6374,8 @@ Error RenderingDeviceDriverD3D12::_initialize_device() { // These happen due to how D3D12MA manages buffers; seems benign. D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE, D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS, + // Seemingly a false positive. + D3D12_MESSAGE_ID_DATA_STATIC_WHILE_SET_AT_EXECUTE_DESCRIPTOR_INVALID_DATA_CHANGE, }; D3D12_INFO_QUEUE_FILTER filter = {}; @@ -6250,6 +6525,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); if (SUCCEEDED(res)) { format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported; + barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported; } if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) { @@ -6282,7 +6558,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { #if 0 print_verbose("- Relaxed casting supported"); #else - // Certain configurations (Windows 11 with an updated Nvida driver) crash when using relaxed casting. + // Certain configurations (Windows 11 with an updated NVIDIA driver) crash when using relaxed casting. // Therefore, we disable it temporarily until we can assure that it's reliable. // There are fallbacks in place that work in every case, if less efficient. format_capabilities.relaxed_casting_supported = false; @@ -6382,10 +6658,6 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) { frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker(); frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker(); frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker(); - - ID3D12Resource *resource = nullptr; - HRESULT res = allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, &frames[frame_idx].aux_resource, IID_PPV_ARGS(&resource)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); } return OK; diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 3a9677485e5..1782819238f 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -122,6 +122,10 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { bool relaxed_casting_supported = false; }; + struct BarrierCapabilities { + bool enhanced_barriers_supported = false; + }; + RenderingContextDriverD3D12 *context_driver = nullptr; RenderingContextDriver::Device context_device; ComPtr adapter; @@ -136,6 +140,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; FormatCapabilities format_capabilities; + BarrierCapabilities barrier_capabilities; String pipeline_cache_id; class DescriptorsHeap { @@ -218,11 +223,6 @@ private: // As many subresources as mipmaps * layers; planes (for depth-stencil) are tracked together. TightLocalVector subresource_states; // Used only if not a view. uint32_t last_batch_with_uav_barrier = 0; - struct CrossFamillyFallback { - TightLocalVector subresources_dirty; - ComPtr interim_buffer; - ComPtr interim_buffer_alloc; - } xfamily_fallback; // [[CROSS_FAMILY_FALLBACK]]. }; ID3D12Resource *resource = nullptr; // Non-null even if not owned. @@ -275,7 +275,6 @@ private: uint64_t size = 0; struct { bool usable_as_uav : 1; - bool is_for_upload : 1; } flags = {}; }; @@ -317,10 +316,14 @@ private: UINT _compute_component_mapping(const TextureView &p_view); UINT _compute_plane_slice(DataFormat p_format, BitField p_aspect_bits); UINT _compute_plane_slice(DataFormat p_format, TextureAspect p_aspect); + UINT _compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset); struct CommandBufferInfo; void _discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info); +protected: + virtual bool _unordered_access_supported_by_format(DataFormat p_format); + public: virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) override final; virtual TextureID texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) override final; @@ -332,6 +335,7 @@ public: virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final; virtual void texture_unmap(TextureID p_texture) override final; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; private: TextureID _texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps); @@ -367,8 +371,8 @@ public: virtual void command_pipeline_barrier( CommandBufferID p_cmd_buffer, - BitField p_src_stages, - BitField p_dst_stages, + BitField p_src_stages, + BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, VectorView p_texture_barriers) override final; @@ -465,16 +469,6 @@ private: RenderPassState render_pass_state; bool descriptor_heaps_set = false; - - // [[CROSS_FAMILY_FALLBACK]]. - struct FamilyFallbackCopy { - TextureInfo *texture = nullptr; - uint32_t subresource = 0; - uint32_t mipmap = 0; - D3D12_RESOURCE_STATES dst_wanted_state = {}; - }; - LocalVector family_fallback_copies; - uint32_t family_fallback_copy_count = 0; }; public: @@ -961,7 +955,6 @@ private: bool rtv = false; } desc_heaps_exhausted_reported; CD3DX12_CPU_DESCRIPTOR_HANDLE null_rtv_handle = {}; // For [[MANUAL_SUBPASSES]]. - ComPtr aux_resource; uint32_t segment_serial = 0; #ifdef DEV_ENABLED diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index b03a8418ed9..2a9e9ba2645 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -264,6 +264,63 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, }; +static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = { + VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED + VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL + VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL +}; + +static VkPipelineStageFlags _rd_to_vk_pipeline_stages(BitField p_stages) { + VkPipelineStageFlags vk_flags = 0; + if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) { + // Transfer has been split into copy and resolve bits. Clear them and merge them into one bit. + vk_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + p_stages.clear_flag(RDD::PIPELINE_STAGE_COPY_BIT); + p_stages.clear_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT); + } + + if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) { + // Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified. + // Therefore, storage is never cleared with an explicit command. + p_stages.clear_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT); + } + + // The rest of the flags have compatible numeric values with Vulkan. + return VkPipelineStageFlags(p_stages) | vk_flags; +} + +static VkAccessFlags _rd_to_vk_access_flags(BitField p_access) { + VkAccessFlags vk_flags = 0; + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) { + vk_flags |= VK_ACCESS_TRANSFER_READ_BIT; + p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT); + p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT); + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) { + vk_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT); + p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT); + } + + if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) { + // Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified. + // Therefore, storage is never cleared with an explicit command. + p_access.clear_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT); + } + + // The rest of the flags have compatible numeric values with Vulkan. + return VkAccessFlags(p_access) | vk_flags; +} + // RDD::CompareOperator == VkCompareOp. static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, VK_COMPARE_OP_NEVER)); static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, VK_COMPARE_OP_LESS)); @@ -1334,18 +1391,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_G, VK_COMPONENT_SWIZZLE_G) static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_B, VK_COMPONENT_SWIZZLE_B)); static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_A, VK_COMPONENT_SWIZZLE_A)); -// RDD::TextureLayout == VkImageLayout. -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_UNDEFINED)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_PREINITIALIZED, VK_IMAGE_LAYOUT_PREINITIALIZED)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR)); - // RDD::TextureAspectBits == VkImageAspectFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT)); @@ -1774,6 +1819,11 @@ BitField RenderingDeviceDriverVulkan::texture_get_usages_ return supported; } +bool RenderingDeviceDriverVulkan::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) { + r_raw_reinterpretation = false; + return true; +} + /*****************/ /**** SAMPLER ****/ /*****************/ @@ -1893,7 +1943,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, V static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); @@ -1910,8 +1959,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT, static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT)); -static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_READ_BIT, VK_ACCESS_HOST_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_HOST_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT)); @@ -1929,8 +1976,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { vk_memory_barriers[i] = {}; vk_memory_barriers[i].sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - vk_memory_barriers[i].srcAccessMask = (VkPipelineStageFlags)p_memory_barriers[i].src_access; - vk_memory_barriers[i].dstAccessMask = (VkAccessFlags)p_memory_barriers[i].dst_access; + vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access); + vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access); } VkBufferMemoryBarrier *vk_buffer_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_buffer_barriers.size()); @@ -1939,8 +1986,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_buffer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_buffer_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_buffer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_buffer_barriers[i].srcAccessMask = (VkAccessFlags)p_buffer_barriers[i].src_access; - vk_buffer_barriers[i].dstAccessMask = (VkAccessFlags)p_buffer_barriers[i].dst_access; + vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access); + vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access); vk_buffer_barriers[i].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer; vk_buffer_barriers[i].offset = p_buffer_barriers[i].offset; vk_buffer_barriers[i].size = p_buffer_barriers[i].size; @@ -1951,10 +1998,10 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( const TextureInfo *tex_info = (const TextureInfo *)p_texture_barriers[i].texture.id; vk_image_barriers[i] = {}; vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - vk_image_barriers[i].srcAccessMask = (VkAccessFlags)p_texture_barriers[i].src_access; - vk_image_barriers[i].dstAccessMask = (VkAccessFlags)p_texture_barriers[i].dst_access; - vk_image_barriers[i].oldLayout = (VkImageLayout)p_texture_barriers[i].prev_layout; - vk_image_barriers[i].newLayout = (VkImageLayout)p_texture_barriers[i].next_layout; + vk_image_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].src_access); + vk_image_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].dst_access); + vk_image_barriers[i].oldLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].prev_layout]; + vk_image_barriers[i].newLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].next_layout]; vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_image_barriers[i].image = tex_info->vk_view_create_info.image; @@ -1984,8 +2031,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vkCmdPipelineBarrier( (VkCommandBuffer)p_cmd_buffer.id, - (VkPipelineStageFlags)p_src_stages, - (VkPipelineStageFlags)p_dst_stages, + _rd_to_vk_pipeline_stages(p_src_stages), + _rd_to_vk_pipeline_stages(p_dst_stages), 0, p_memory_barriers.size(), vk_memory_barriers, p_buffer_barriers.size(), vk_buffer_barriers, @@ -3727,7 +3774,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buf const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id; const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id; - vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, dst_tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, p_regions.size(), vk_copy_regions); + vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { @@ -3747,7 +3794,7 @@ void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_ vk_resolve.extent.height = MAX(1u, src_tex_info->vk_create_info.extent.height >> p_src_mipmap); vk_resolve.extent.depth = MAX(1u, src_tex_info->vk_create_info.extent.depth >> p_src_mipmap); - vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, dst_tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, 1, &vk_resolve); + vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve); } void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { @@ -3758,7 +3805,7 @@ void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_ _texture_subresource_range_to_vk(p_subresources, &vk_subresources); const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; - vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, (VkImageLayout)p_texture_layout, &vk_color, 1, &vk_subresources); + vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources); } void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { @@ -3769,7 +3816,7 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id; - vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, p_regions.size(), vk_copy_regions); + vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { @@ -3780,7 +3827,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id; const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id; - vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, buf_info->vk_buffer, p_regions.size(), vk_copy_regions); + vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions); } /******************/ @@ -3926,7 +3973,7 @@ static void _attachment_reference_to_vk(const RDD::AttachmentReference &p_attach *r_vk_attachment_reference = {}; r_vk_attachment_reference->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; r_vk_attachment_reference->attachment = p_attachment_reference.attachment; - r_vk_attachment_reference->layout = (VkImageLayout)p_attachment_reference.layout; + r_vk_attachment_reference->layout = RD_TO_VK_LAYOUT[p_attachment_reference.layout]; r_vk_attachment_reference->aspectMask = (VkImageAspectFlags)p_attachment_reference.aspect; } @@ -3945,8 +3992,8 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; /*****************/ /**** SAMPLER ****/ diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 0227472d0e6..f9883106c2b 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -824,8 +824,8 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture } if (texture.draw_tracker != nullptr) { - // Draw tracker can assume the texture will be in transfer destination. - texture.draw_tracker->usage = RDG::RESOURCE_USAGE_TRANSFER_TO; + // Draw tracker can assume the texture will be in copy destination. + texture.draw_tracker->usage = RDG::RESOURCE_USAGE_COPY_TO; } } @@ -847,8 +847,11 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with // Create view. Texture texture = *src_texture; + texture.shared_fallback = nullptr; RDD::TextureView tv; + bool create_shared = true; + bool raw_reintepretation = false; if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) { tv.format = texture.format; } else { @@ -857,13 +860,47 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with ERR_FAIL_COND_V_MSG(!texture.allowed_shared_formats.has(p_view.format_override), RID(), "Format override is not in the list of allowed shareable formats for original texture."); tv.format = p_view.format_override; + create_shared = driver->texture_can_make_shared_with_format(texture.driver_id, p_view.format_override, raw_reintepretation); } tv.swizzle_r = p_view.swizzle_r; tv.swizzle_g = p_view.swizzle_g; tv.swizzle_b = p_view.swizzle_b; tv.swizzle_a = p_view.swizzle_a; - texture.driver_id = driver->texture_create_shared(texture.driver_id, tv); + if (create_shared) { + texture.driver_id = driver->texture_create_shared(texture.driver_id, tv); + } else { + // The regular view will use the same format as the main texture. + RDD::TextureView regular_view = tv; + regular_view.format = src_texture->format; + texture.driver_id = driver->texture_create_shared(texture.driver_id, regular_view); + + // Create the independent texture for the alias. + RDD::TextureFormat alias_format = texture.texture_format(); + alias_format.format = tv.format; + alias_format.usage_bits = TEXTURE_USAGE_SAMPLING_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT; + + _texture_check_shared_fallback(src_texture); + _texture_check_shared_fallback(&texture); + + texture.shared_fallback->texture = driver->texture_create(alias_format, tv); + texture.shared_fallback->raw_reinterpretation = raw_reintepretation; + texture_memory += driver->texture_get_allocation_size(texture.shared_fallback->texture); + + RDG::ResourceTracker *tracker = RDG::resource_tracker_create(); + tracker->texture_driver_id = texture.shared_fallback->texture; + tracker->texture_subresources = texture.barrier_range(); + tracker->texture_usage = alias_format.usage_bits; + tracker->reference_count = 1; + texture.shared_fallback->texture_tracker = tracker; + texture.shared_fallback->revision = 0; + + if (raw_reintepretation && src_texture->shared_fallback->buffer.id == 0) { + // For shared textures of the same size, we create the buffer on the main texture if it doesn't have it already. + _texture_create_reinterpret_buffer(src_texture); + } + } + ERR_FAIL_COND_V(!texture.driver_id, RID()); texture.slice_trackers.clear(); @@ -965,6 +1002,7 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, } Texture texture = *src_texture; + texture.shared_fallback = nullptr; get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height); texture.mipmaps = p_mipmaps; @@ -979,6 +1017,8 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, } RDD::TextureView tv; + bool create_shared = true; + bool raw_reintepretation = false; if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) { tv.format = texture.format; } else { @@ -987,7 +1027,9 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, ERR_FAIL_COND_V_MSG(!texture.allowed_shared_formats.has(p_view.format_override), RID(), "Format override is not in the list of allowed shareable formats for original texture."); tv.format = p_view.format_override; + create_shared = driver->texture_can_make_shared_with_format(texture.driver_id, p_view.format_override, raw_reintepretation); } + tv.swizzle_r = p_view.swizzle_r; tv.swizzle_g = p_view.swizzle_g; tv.swizzle_b = p_view.swizzle_b; @@ -1000,7 +1042,47 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, "Specified layer must be a multiple of 6."); } - texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); + if (create_shared) { + texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); + } else { + // The regular view will use the same format as the main texture. + RDD::TextureView regular_view = tv; + regular_view.format = src_texture->format; + texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, regular_view, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); + + // Create the independent texture for the slice. + RDD::TextureSubresourceRange slice_range = texture.barrier_range(); + slice_range.base_mipmap = 0; + slice_range.base_layer = 0; + + RDD::TextureFormat slice_format = texture.texture_format(); + slice_format.width = MAX(texture.width >> p_mipmap, 1U); + slice_format.height = MAX(texture.height >> p_mipmap, 1U); + slice_format.depth = MAX(texture.depth >> p_mipmap, 1U); + slice_format.format = tv.format; + slice_format.usage_bits = TEXTURE_USAGE_SAMPLING_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT; + + _texture_check_shared_fallback(src_texture); + _texture_check_shared_fallback(&texture); + + texture.shared_fallback->texture = driver->texture_create(slice_format, tv); + texture.shared_fallback->raw_reinterpretation = raw_reintepretation; + texture_memory += driver->texture_get_allocation_size(texture.shared_fallback->texture); + + RDG::ResourceTracker *tracker = RDG::resource_tracker_create(); + tracker->texture_driver_id = texture.shared_fallback->texture; + tracker->texture_subresources = slice_range; + tracker->texture_usage = slice_format.usage_bits; + tracker->reference_count = 1; + texture.shared_fallback->texture_tracker = tracker; + texture.shared_fallback->revision = 0; + + if (raw_reintepretation && src_texture->shared_fallback->buffer.id == 0) { + // For shared texture slices, we create the buffer on the slice if the source texture has no reinterpretation buffer. + _texture_create_reinterpret_buffer(&texture); + } + } + ERR_FAIL_COND_V(!texture.driver_id, RID()); const Rect2i slice_rect(p_mipmap, p_layer, p_mipmaps, slice_layers); @@ -1093,15 +1175,18 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve // When using the setup queue directly, we transition the texture to the optimal layout. RDD::TextureBarrier tb; tb.texture = texture->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; + tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); + driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb); + } else if (!p_use_setup_queue) { + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_texture, texture, true); } uint32_t mipmap_offset = 0; @@ -1199,7 +1284,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1); if (p_use_setup_queue) { - driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, copy_region); + driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region); } else { RDG::RecordedBufferToTextureCopy buffer_to_texture_copy; buffer_to_texture_copy.from_buffer = staging_buffer_blocks[staging_buffer_current].driver_id; @@ -1221,14 +1306,14 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve // If the texture does not have a tracker, it means it must be transitioned to the sampling state. RDD::TextureBarrier tb; tb.texture = texture->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; + tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); + driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); } else if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty()) { if (_texture_make_mutable(texture, p_texture)) { // The texture must be mutable to be used as a copy destination. @@ -1241,6 +1326,186 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve return OK; } +void RenderingDevice::_texture_check_shared_fallback(Texture *p_texture) { + if (p_texture->shared_fallback == nullptr) { + p_texture->shared_fallback = memnew(Texture::SharedFallback); + } +} + +void RenderingDevice::_texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing) { + if (p_texture->shared_fallback == nullptr) { + // This texture does not use any of the shared texture fallbacks. + return; + } + + if (p_texture->owner.is_valid()) { + Texture *owner_texture = texture_owner.get_or_null(p_texture->owner); + ERR_FAIL_NULL(owner_texture); + if (p_for_writing) { + // Only the main texture is used for writing when using the shared fallback. + owner_texture->shared_fallback->revision++; + } else if (p_texture->shared_fallback->revision != owner_texture->shared_fallback->revision) { + // Copy the contents of the main texture into the shared texture fallback slice. Update the revision. + _texture_copy_shared(p_texture->owner, owner_texture, p_texture_rid, p_texture); + p_texture->shared_fallback->revision = owner_texture->shared_fallback->revision; + } + } else if (p_for_writing) { + // Increment the revision of the texture so shared texture fallback slices must be updated. + p_texture->shared_fallback->revision++; + } +} + +void RenderingDevice::_texture_free_shared_fallback(Texture *p_texture) { + if (p_texture->shared_fallback != nullptr) { + if (p_texture->shared_fallback->texture_tracker != nullptr) { + RDG::resource_tracker_free(p_texture->shared_fallback->texture_tracker); + } + + if (p_texture->shared_fallback->buffer_tracker != nullptr) { + RDG::resource_tracker_free(p_texture->shared_fallback->buffer_tracker); + } + + if (p_texture->shared_fallback->texture.id != 0) { + texture_memory -= driver->texture_get_allocation_size(p_texture->shared_fallback->texture); + driver->texture_free(p_texture->shared_fallback->texture); + } + + if (p_texture->shared_fallback->buffer.id != 0) { + buffer_memory -= driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer); + driver->buffer_free(p_texture->shared_fallback->buffer); + } + + memdelete(p_texture->shared_fallback); + p_texture->shared_fallback = nullptr; + } +} + +void RenderingDevice::_texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture) { + // The only type of copying allowed is from the main texture to the slice texture, as slice textures are not allowed to be used for writing when using this fallback. + DEV_ASSERT(p_src_texture != nullptr); + DEV_ASSERT(p_dst_texture != nullptr); + DEV_ASSERT(p_src_texture->owner.is_null()); + DEV_ASSERT(p_dst_texture->owner == p_src_texture_rid); + + bool src_made_mutable = _texture_make_mutable(p_src_texture, p_src_texture_rid); + bool dst_made_mutable = _texture_make_mutable(p_dst_texture, p_dst_texture_rid); + if (src_made_mutable || dst_made_mutable) { + draw_graph.add_synchronization(); + } + + if (p_dst_texture->shared_fallback->raw_reinterpretation) { + // If one of the textures is a main texture and they have a reinterpret buffer, we prefer using that as it's guaranteed to be big enough to hold + // anything and it's how the shared textures that don't use slices are created. + bool src_has_buffer = p_src_texture->shared_fallback->buffer.id != 0; + bool dst_has_buffer = p_dst_texture->shared_fallback->buffer.id != 0; + bool from_src = p_src_texture->owner.is_null() && src_has_buffer; + bool from_dst = p_dst_texture->owner.is_null() && dst_has_buffer; + if (!from_src && !from_dst) { + // If neither texture passed the condition, we just pick whichever texture has a reinterpretation buffer. + from_src = src_has_buffer; + from_dst = dst_has_buffer; + } + + // Pick the buffer and tracker to use from the right texture. + RDD::BufferID shared_buffer; + RDG::ResourceTracker *shared_buffer_tracker = nullptr; + if (from_src) { + shared_buffer = p_src_texture->shared_fallback->buffer; + shared_buffer_tracker = p_src_texture->shared_fallback->buffer_tracker; + } else if (from_dst) { + shared_buffer = p_dst_texture->shared_fallback->buffer; + shared_buffer_tracker = p_dst_texture->shared_fallback->buffer_tracker; + } else { + DEV_ASSERT(false && "This path should not be reachable."); + } + + // FIXME: When using reinterpretation buffers, the only texture aspect supported is color. Depth or stencil contents won't get copied. + RDD::BufferTextureCopyRegion get_data_region; + RDG::RecordedBufferToTextureCopy update_copy; + RDD::TextureCopyableLayout first_copyable_layout; + RDD::TextureCopyableLayout copyable_layout; + RDD::TextureSubresource texture_subresource; + texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR; + texture_subresource.layer = 0; + texture_subresource.mipmap = 0; + driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, &first_copyable_layout); + + // Copying each mipmap from main texture to a buffer and then to the slice texture. + thread_local LocalVector get_data_vector; + thread_local LocalVector update_vector; + get_data_vector.clear(); + update_vector.clear(); + for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) { + driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, ©able_layout); + + uint32_t mipmap = p_dst_texture->base_mipmap + i; + get_data_region.buffer_offset = copyable_layout.offset - first_copyable_layout.offset; + get_data_region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; + get_data_region.texture_subresources.base_layer = p_dst_texture->base_layer; + get_data_region.texture_subresources.mipmap = mipmap; + get_data_region.texture_subresources.layer_count = p_dst_texture->layers; + get_data_region.texture_region_size.x = MAX(1U, p_src_texture->width >> mipmap); + get_data_region.texture_region_size.y = MAX(1U, p_src_texture->height >> mipmap); + get_data_region.texture_region_size.z = MAX(1U, p_src_texture->depth >> mipmap); + get_data_vector.push_back(get_data_region); + + update_copy.from_buffer = shared_buffer; + update_copy.region.buffer_offset = get_data_region.buffer_offset; + update_copy.region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; + update_copy.region.texture_subresources.base_layer = texture_subresource.layer; + update_copy.region.texture_subresources.mipmap = texture_subresource.mipmap; + update_copy.region.texture_subresources.layer_count = get_data_region.texture_subresources.layer_count; + update_copy.region.texture_region_size.x = get_data_region.texture_region_size.x; + update_copy.region.texture_region_size.y = get_data_region.texture_region_size.y; + update_copy.region.texture_region_size.z = get_data_region.texture_region_size.z; + update_vector.push_back(update_copy); + + texture_subresource.mipmap++; + } + + draw_graph.add_texture_get_data(p_src_texture->driver_id, p_src_texture->draw_tracker, shared_buffer, get_data_vector, shared_buffer_tracker); + draw_graph.add_texture_update(p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, update_vector, shared_buffer_tracker); + } else { + // Raw reinterpretation is not required. Use a regular texture copy. + RDD::TextureCopyRegion copy_region; + copy_region.src_subresources.aspect = p_src_texture->read_aspect_flags; + copy_region.src_subresources.base_layer = p_dst_texture->base_layer; + copy_region.src_subresources.layer_count = p_dst_texture->layers; + copy_region.dst_subresources.aspect = p_dst_texture->read_aspect_flags; + copy_region.dst_subresources.base_layer = 0; + copy_region.dst_subresources.layer_count = copy_region.src_subresources.layer_count; + + // Copying each mipmap from main texture to to the slice texture. + thread_local LocalVector region_vector; + region_vector.clear(); + for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) { + uint32_t mipmap = p_dst_texture->base_mipmap + i; + copy_region.src_subresources.mipmap = mipmap; + copy_region.dst_subresources.mipmap = i; + copy_region.size.x = MAX(1U, p_src_texture->width >> mipmap); + copy_region.size.y = MAX(1U, p_src_texture->height >> mipmap); + copy_region.size.z = MAX(1U, p_src_texture->depth >> mipmap); + region_vector.push_back(copy_region); + } + + draw_graph.add_texture_copy(p_src_texture->driver_id, p_src_texture->draw_tracker, p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, region_vector); + } +} + +void RenderingDevice::_texture_create_reinterpret_buffer(Texture *p_texture) { + uint64_t row_pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); + uint64_t transfer_alignment = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT); + uint32_t pixel_bytes = get_image_format_pixel_size(p_texture->format); + uint32_t row_pitch = STEPIFY(p_texture->width * pixel_bytes, row_pitch_step); + uint64_t buffer_size = STEPIFY(pixel_bytes * row_pitch * p_texture->height * p_texture->depth, transfer_alignment); + p_texture->shared_fallback->buffer = driver->buffer_create(buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU); + buffer_memory += driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer); + + RDG::ResourceTracker *tracker = RDG::resource_tracker_create(); + tracker->buffer_driver_id = p_texture->shared_fallback->buffer; + p_texture->shared_fallback->buffer_tracker = tracker; +} + Vector RenderingDevice::_texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d) { uint32_t width, height, depth; uint32_t tight_mip_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth); @@ -1535,6 +1800,9 @@ Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const copy_region.size = p_size; + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_to_texture, dst_tex, true); + // The textures must be mutable to be used in the copy operation. bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); @@ -1578,6 +1846,9 @@ Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_ ERR_FAIL_COND_V_MSG(src_tex->read_aspect_flags != dst_tex->read_aspect_flags, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_to_texture, dst_tex, true); + // The textures must be mutable to be used in the resolve operation. bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); @@ -1620,6 +1891,9 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32 range.base_layer = src_tex->base_layer + p_base_layer; range.layer_count = p_layers; + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_texture, src_tex, true); + if (_texture_make_mutable(src_tex, p_texture)) { // The texture must be mutable to be used as a clear destination. draw_graph.add_synchronization(); @@ -2526,6 +2800,14 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vectorshared_textures_to_update) { + Texture *texture = texture_owner.get_or_null(shared.texture); + ERR_CONTINUE(texture == nullptr); + _texture_update_shared_fallback(shared.texture, texture, shared.writing); + } +} + RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p_shader, uint32_t p_shader_set) { _THREAD_SAFE_METHOD_ @@ -2554,6 +2836,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p Vector draw_trackers; Vector draw_trackers_usage; HashMap untracked_usage; + Vector shared_textures_to_update; for (uint32_t i = 0; i < set_uniform_count; i++) { const ShaderUniform &set_uniform = set_uniforms[i]; @@ -2619,8 +2902,16 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p attachable_textures.push_back(attachable_texture); } - if (texture->draw_tracker != nullptr) { - draw_trackers.push_back(texture->draw_tracker); + RDD::TextureID driver_id = texture->driver_id; + RDG::ResourceTracker *tracker = texture->draw_tracker; + if (texture->shared_fallback != nullptr && texture->shared_fallback->texture.id != 0) { + driver_id = texture->shared_fallback->texture; + tracker = texture->shared_fallback->texture_tracker; + shared_textures_to_update.push_back({ false, texture_id }); + } + + if (tracker != nullptr) { + draw_trackers.push_back(tracker); draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE); } else { untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; @@ -2629,7 +2920,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); driver_uniform.ids.push_back(*sampler_driver_id); - driver_uniform.ids.push_back(texture->driver_id); + driver_uniform.ids.push_back(driver_id); } } break; case UNIFORM_TYPE_TEXTURE: { @@ -2656,8 +2947,16 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p attachable_textures.push_back(attachable_texture); } - if (texture->draw_tracker != nullptr) { - draw_trackers.push_back(texture->draw_tracker); + RDD::TextureID driver_id = texture->driver_id; + RDG::ResourceTracker *tracker = texture->draw_tracker; + if (texture->shared_fallback != nullptr && texture->shared_fallback->texture.id != 0) { + driver_id = texture->shared_fallback->texture; + tracker = texture->shared_fallback->texture_tracker; + shared_textures_to_update.push_back({ false, texture_id }); + } + + if (tracker != nullptr) { + draw_trackers.push_back(tracker); draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE); } else { untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; @@ -2665,7 +2964,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); - driver_uniform.ids.push_back(texture->driver_id); + driver_uniform.ids.push_back(driver_id); } } break; case UNIFORM_TYPE_IMAGE: { @@ -2687,6 +2986,10 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(), "Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform."); + if (texture->owner.is_null() && texture->shared_fallback != nullptr) { + shared_textures_to_update.push_back({ true, texture_id }); + } + if (_texture_make_mutable(texture, texture_id)) { // The texture must be mutable as a layout transition will be required. draw_graph.add_synchronization(); @@ -2872,6 +3175,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p uniform_set.draw_trackers = draw_trackers; uniform_set.draw_trackers_usage = draw_trackers_usage; uniform_set.untracked_usage = untracked_usage; + uniform_set.shared_textures_to_update = shared_textures_to_update; uniform_set.shader_set = p_shader_set; uniform_set.shader_id = p_shader; @@ -3347,12 +3651,16 @@ Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) { RDD::RenderPassClearValue clear_value; - Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]); + RID texture_rid = p_framebuffer->texture_ids[i]; + Texture *texture = texture_owner.get_or_null(texture_rid); if (!texture) { color_index++; continue; } + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(texture_rid, texture, true); + if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { if (color_index < p_clear_colors.size()) { ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); // A bug. @@ -3816,6 +4124,8 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); dl->state.sets[i].bound = true; @@ -4222,6 +4532,8 @@ void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_g draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); cl->state.sets[i].bound = true; @@ -4329,6 +4641,8 @@ void RenderingDevice::compute_list_dispatch_indirect(ComputeListID p_list, RID p draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); cl->state.sets[i].bound = true; @@ -4420,6 +4734,7 @@ bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id draw_tracker->parent = owner_texture->draw_tracker; draw_tracker->texture_driver_id = p_texture->driver_id; draw_tracker->texture_subresources = p_texture->barrier_range(); + draw_tracker->texture_usage = p_texture->usage_flags; draw_tracker->texture_slice_or_dirty_rect = p_texture->slice_rect; owner_texture->slice_trackers[p_texture->slice_rect] = draw_tracker; } @@ -4441,6 +4756,7 @@ bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id p_texture->draw_tracker = RDG::resource_tracker_create(); p_texture->draw_tracker->texture_driver_id = p_texture->driver_id; p_texture->draw_tracker->texture_subresources = p_texture->barrier_range(); + p_texture->draw_tracker->texture_usage = p_texture->usage_flags; p_texture->draw_tracker->reference_count = 1; if (p_texture_id.is_valid()) { @@ -4837,6 +5153,8 @@ void RenderingDevice::_free_pending_resources(int p_frame) { WARN_PRINT("Deleted a texture while it was bound."); } + _texture_free_shared_fallback(texture); + texture_memory -= driver->texture_get_allocation_size(texture->driver_id); driver->texture_free(texture->driver_id); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index d0fa4ab1fa5..38ffd5d23de 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -207,6 +207,15 @@ public: // for a framebuffer to render into it. struct Texture { + struct SharedFallback { + uint32_t revision = 1; + RDD::TextureID texture; + RDG::ResourceTracker *texture_tracker = nullptr; + RDD::BufferID buffer; + RDG::ResourceTracker *buffer_tracker = nullptr; + bool raw_reinterpretation = false; + }; + RDD::TextureID driver_id; TextureType type = TEXTURE_TYPE_MAX; @@ -235,6 +244,7 @@ public: RDG::ResourceTracker *draw_tracker = nullptr; HashMap slice_trackers; + SharedFallback *shared_fallback = nullptr; RDD::TextureSubresourceRange barrier_range() const { RDD::TextureSubresourceRange r; @@ -245,6 +255,22 @@ public: r.layer_count = layers; return r; } + + TextureFormat texture_format() const { + TextureFormat tf; + tf.format = format; + tf.width = width; + tf.height = height; + tf.depth = depth; + tf.array_layers = layers; + tf.mipmaps = mipmaps; + tf.texture_type = type; + tf.samples = samples; + tf.usage_bits = usage_flags; + tf.shareable_formats = allowed_shared_formats; + tf.is_resolve_buffer = is_resolve_buffer; + return tf; + } }; RID_Owner texture_owner; @@ -252,6 +278,11 @@ public: Vector _texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d = false); Error _texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_use_setup_queue, bool p_validate_can_update); + void _texture_check_shared_fallback(Texture *p_texture); + void _texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing); + void _texture_free_shared_fallback(Texture *p_texture); + void _texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture); + void _texture_create_reinterpret_buffer(Texture *p_texture); public: struct TextureView { @@ -916,16 +947,24 @@ private: RID texture; }; + struct SharedTexture { + uint32_t writing = 0; + RID texture; + }; + LocalVector attachable_textures; // Used for validation. Vector draw_trackers; Vector draw_trackers_usage; HashMap untracked_usage; + LocalVector shared_textures_to_update; InvalidationCallback invalidated_callback = nullptr; void *invalidated_callback_userdata = nullptr; }; RID_Owner uniform_set_owner; + void _uniform_set_update_shared(UniformSet *p_uniform_set); + public: RID uniform_set_create(const Vector &p_uniforms, RID p_shader, uint32_t p_shader_set); bool uniform_set_is_valid(RID p_uniform_set); diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index be74467340a..3b8e3efeb8e 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -372,6 +372,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) { return 1; case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR: return 1; + case API_TRAIT_CLEARS_WITH_COPY_ENGINE: + return true; default: ERR_FAIL_V(0); } diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index f9a861426ad..0b5fc51a1d8 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -220,15 +220,17 @@ public: enum TextureLayout { TEXTURE_LAYOUT_UNDEFINED, - TEXTURE_LAYOUT_GENERAL, + TEXTURE_LAYOUT_STORAGE_OPTIMAL, TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, - TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, - TEXTURE_LAYOUT_PREINITIALIZED, - TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL = 1000164003, + TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, + TEXTURE_LAYOUT_COPY_DST_OPTIMAL, + TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, + TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, + TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, + TEXTURE_LAYOUT_MAX }; enum TextureAspect { @@ -284,6 +286,7 @@ public: virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) = 0; virtual void texture_unmap(TextureID p_texture) = 0; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) = 0; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) = 0; /*****************/ /**** SAMPLER ****/ @@ -317,10 +320,12 @@ public: PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = (1 << 9), PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = (1 << 10), PIPELINE_STAGE_COMPUTE_SHADER_BIT = (1 << 11), - PIPELINE_STAGE_TRANSFER_BIT = (1 << 12), + PIPELINE_STAGE_COPY_BIT = (1 << 12), PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT = (1 << 13), + PIPELINE_STAGE_RESOLVE_BIT = (1 << 14), PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), + PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), }; enum BarrierAccessBits { @@ -335,13 +340,16 @@ public: BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = (1 << 8), BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = (1 << 9), BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = (1 << 10), - BARRIER_ACCESS_TRANSFER_READ_BIT = (1 << 11), - BARRIER_ACCESS_TRANSFER_WRITE_BIT = (1 << 12), + BARRIER_ACCESS_COPY_READ_BIT = (1 << 11), + BARRIER_ACCESS_COPY_WRITE_BIT = (1 << 12), BARRIER_ACCESS_HOST_READ_BIT = (1 << 13), BARRIER_ACCESS_HOST_WRITE_BIT = (1 << 14), BARRIER_ACCESS_MEMORY_READ_BIT = (1 << 15), BARRIER_ACCESS_MEMORY_WRITE_BIT = (1 << 16), BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT = (1 << 23), + BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24), + BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25), + BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27), }; struct MemoryBarrier { @@ -735,7 +743,9 @@ public: API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT, API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP, API_TRAIT_SECONDARY_VIEWPORT_SCISSOR, + API_TRAIT_CLEARS_WITH_COPY_ENGINE, }; + enum ShaderChangeInvalidation { SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS, // What Vulkan does. diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index c7de5c67cb3..221ec72e4a4 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -36,7 +36,8 @@ #define PRINT_COMMAND_RECORDING 0 RenderingDeviceGraph::RenderingDeviceGraph() { - // Default initialization. + driver_honors_barriers = false; + driver_clears_with_copy_engine = false; } RenderingDeviceGraph::~RenderingDeviceGraph() { @@ -44,7 +45,8 @@ RenderingDeviceGraph::~RenderingDeviceGraph() { bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { switch (p_usage) { - case RESOURCE_USAGE_TRANSFER_FROM: + case RESOURCE_USAGE_COPY_FROM: + case RESOURCE_USAGE_RESOLVE_FROM: case RESOURCE_USAGE_UNIFORM_BUFFER_READ: case RESOURCE_USAGE_INDIRECT_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_BUFFER_READ: @@ -54,7 +56,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_TEXTURE_SAMPLE: case RESOURCE_USAGE_STORAGE_IMAGE_READ: return false; - case RESOURCE_USAGE_TRANSFER_TO: + case RESOURCE_USAGE_COPY_TO: + case RESOURCE_USAGE_RESOLVE_TO: case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE: case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: @@ -69,15 +72,19 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { RDD::TextureLayout RenderingDeviceGraph::_usage_to_image_layout(ResourceUsage p_usage) { switch (p_usage) { - case RESOURCE_USAGE_TRANSFER_FROM: - return RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - case RESOURCE_USAGE_TRANSFER_TO: - return RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + case RESOURCE_USAGE_COPY_FROM: + return RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL; + case RESOURCE_USAGE_COPY_TO: + return RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; + case RESOURCE_USAGE_RESOLVE_FROM: + return RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL; + case RESOURCE_USAGE_RESOLVE_TO: + return RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL; case RESOURCE_USAGE_TEXTURE_SAMPLE: return RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; case RESOURCE_USAGE_STORAGE_IMAGE_READ: case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: - return RDD::TEXTURE_LAYOUT_GENERAL; + return RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL; case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: return RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: @@ -97,10 +104,14 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage switch (p_usage) { case RESOURCE_USAGE_NONE: return RDD::BarrierAccessBits(0); - case RESOURCE_USAGE_TRANSFER_FROM: - return RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - case RESOURCE_USAGE_TRANSFER_TO: - return RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; + case RESOURCE_USAGE_COPY_FROM: + return RDD::BARRIER_ACCESS_COPY_READ_BIT; + case RESOURCE_USAGE_COPY_TO: + return RDD::BARRIER_ACCESS_COPY_WRITE_BIT; + case RESOURCE_USAGE_RESOLVE_FROM: + return RDD::BARRIER_ACCESS_RESOLVE_READ_BIT; + case RESOURCE_USAGE_RESOLVE_TO: + return RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT; case RESOURCE_USAGE_UNIFORM_BUFFER_READ: return RDD::BARRIER_ACCESS_UNIFORM_READ_BIT; case RESOURCE_USAGE_INDIRECT_BUFFER_READ: @@ -818,26 +829,27 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC } break; case RecordedCommand::TYPE_TEXTURE_CLEAR: { const RecordedTextureClearCommand *texture_clear_command = reinterpret_cast(command); - driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); + driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); } break; case RecordedCommand::TYPE_TEXTURE_COPY: { const RecordedTextureCopyCommand *texture_copy_command = reinterpret_cast(command); - driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_copy_command->region); + const VectorView command_texture_copy_regions_view(texture_copy_command->texture_copy_regions(), texture_copy_command->texture_copy_regions_count); + driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, command_texture_copy_regions_view); } break; case RecordedCommand::TYPE_TEXTURE_GET_DATA: { const RecordedTextureGetDataCommand *texture_get_data_command = reinterpret_cast(command); const VectorView command_buffer_texture_copy_regions_view(texture_get_data_command->buffer_texture_copy_regions(), texture_get_data_command->buffer_texture_copy_regions_count); - driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); + driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); } break; case RecordedCommand::TYPE_TEXTURE_RESOLVE: { const RecordedTextureResolveCommand *texture_resolve_command = reinterpret_cast(command); - driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); + driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); } break; case RecordedCommand::TYPE_TEXTURE_UPDATE: { const RecordedTextureUpdateCommand *texture_update_command = reinterpret_cast(command); const RecordedBufferToTextureCopy *command_buffer_to_texture_copies = texture_update_command->buffer_to_texture_copies(); for (uint32_t j = 0; j < texture_update_command->buffer_to_texture_copies_count; j++) { - driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); + driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); } } break; case RecordedCommand::TYPE_CAPTURE_TIMESTAMP: { @@ -1271,6 +1283,7 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Dev } driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS); + driver_clears_with_copy_engine = driver->api_trait_get(RDD::API_TRAIT_CLEARS_WITH_COPY_ENGINE); } void RenderingDeviceGraph::finalize() { @@ -1321,12 +1334,12 @@ void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker int32_t command_index; RecordedBufferClearCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferClearCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_CLEAR; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->buffer = p_dst; command->offset = p_offset; command->size = p_size; - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + ResourceUsage usage = RESOURCE_USAGE_COPY_TO; _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); } @@ -1337,13 +1350,13 @@ void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker int32_t command_index; RecordedBufferCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_COPY; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->source = p_src; command->destination = p_dst; command->region = p_region; ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; - ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM }; _add_command_to_graph(trackers, usages, p_src_tracker != nullptr ? 2 : 1, command_index, command); } @@ -1352,13 +1365,13 @@ void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTrac int32_t command_index; RecordedBufferGetDataCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_GET_DATA; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->source = p_src; command->destination = p_dst; command->region = p_region; if (p_src_tracker != nullptr) { - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM; + ResourceUsage usage = RESOURCE_USAGE_COPY_FROM; _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); } else { _add_command_to_graph(nullptr, nullptr, 0, command_index, command); @@ -1373,7 +1386,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke int32_t command_index; RecordedBufferUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_BUFFER_UPDATE; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->destination = p_dst; command->buffer_copies_count = p_buffer_copies.size(); @@ -1382,7 +1395,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke buffer_copies[i] = p_buffer_copies[i]; } - ResourceUsage buffer_usage = RESOURCE_USAGE_TRANSFER_TO; + ResourceUsage buffer_usage = RESOURCE_USAGE_COPY_TO; _add_command_to_graph(&p_dst_tracker, &buffer_usage, 1, command_index, command); } @@ -1710,40 +1723,60 @@ void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTrack int32_t command_index; RecordedTextureClearCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureClearCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_CLEAR; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->texture = p_dst; command->color = p_color; command->range = p_range; - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + ResourceUsage usage; + if (driver_clears_with_copy_engine) { + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; + usage = RESOURCE_USAGE_COPY_TO; + } else { + // If the driver is uncapable of using the copy engine for clearing the image (e.g. D3D12), we must either transition the + // resource to a render target or a storage image as that's the only two ways it can perform the operation. + if (p_dst_tracker->texture_usage & RDD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + command->self_stages = RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + usage = RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE; + } else { + command->self_stages = RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT; + usage = RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE; + } + } + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); } -void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region) { +void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_texture_copy_regions) { DEV_ASSERT(p_src_tracker != nullptr); DEV_ASSERT(p_dst_tracker != nullptr); int32_t command_index; - RecordedTextureCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index)); + uint64_t command_size = sizeof(RecordedTextureCopyCommand) + p_texture_copy_regions.size() * sizeof(RDD::TextureCopyRegion); + RecordedTextureCopyCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_COPY; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->from_texture = p_src; command->to_texture = p_dst; - command->region = p_region; + command->texture_copy_regions_count = p_texture_copy_regions.size(); + + RDD::TextureCopyRegion *texture_copy_regions = command->texture_copy_regions(); + for (uint32_t i = 0; i < command->texture_copy_regions_count; i++) { + texture_copy_regions[i] = p_texture_copy_regions[i]; + } ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; - ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM }; _add_command_to_graph(trackers, usages, 2, command_index, command); } -void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions) { +void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions, ResourceTracker *p_dst_tracker) { DEV_ASSERT(p_src_tracker != nullptr); int32_t command_index; uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion); RecordedTextureGetDataCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->from_texture = p_src; command->to_buffer = p_dst; command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size(); @@ -1753,8 +1786,15 @@ void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTr buffer_texture_copy_regions[i] = p_buffer_texture_copy_regions[i]; } - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM; - _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); + if (p_dst_tracker != nullptr) { + // Add the optional destination tracker if it was provided. + ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; + ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM }; + _add_command_to_graph(trackers, usages, 2, command_index, command); + } else { + ResourceUsage usage = RESOURCE_USAGE_COPY_FROM; + _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); + } } void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { @@ -1764,7 +1804,7 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra int32_t command_index; RecordedTextureResolveCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_RESOLVE_BIT; command->from_texture = p_src; command->to_texture = p_dst; command->src_layer = p_src_layer; @@ -1773,18 +1813,18 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra command->dst_mipmap = p_dst_mipmap; ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; - ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + ResourceUsage usages[2] = { RESOURCE_USAGE_RESOLVE_TO, RESOURCE_USAGE_RESOLVE_FROM }; _add_command_to_graph(trackers, usages, 2, command_index, command); } -void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies) { +void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies, VectorView p_buffer_trackers) { DEV_ASSERT(p_dst_tracker != nullptr); int32_t command_index; uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy); RecordedTextureUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_UPDATE; - command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT; command->to_texture = p_dst; command->buffer_to_texture_copies_count = p_buffer_copies.size(); @@ -1793,8 +1833,25 @@ void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTrac buffer_to_texture_copies[i] = p_buffer_copies[i]; } - ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; - _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); + if (p_buffer_trackers.size() > 0) { + // Add the optional buffer trackers if they were provided. + thread_local LocalVector trackers; + thread_local LocalVector usages; + trackers.clear(); + usages.clear(); + for (uint32_t i = 0; i < p_buffer_trackers.size(); i++) { + trackers.push_back(p_buffer_trackers[i]); + usages.push_back(RESOURCE_USAGE_COPY_FROM); + } + + trackers.push_back(p_dst_tracker); + usages.push_back(RESOURCE_USAGE_COPY_TO); + + _add_command_to_graph(trackers.ptr(), usages.ptr(), trackers.size(), command_index, command); + } else { + ResourceUsage usage = RESOURCE_USAGE_COPY_TO; + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); + } } void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index) { diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index a96382e0cc4..baa15f63f61 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -129,8 +129,10 @@ public: enum ResourceUsage { RESOURCE_USAGE_NONE, - RESOURCE_USAGE_TRANSFER_FROM, - RESOURCE_USAGE_TRANSFER_TO, + RESOURCE_USAGE_COPY_FROM, + RESOURCE_USAGE_COPY_TO, + RESOURCE_USAGE_RESOLVE_FROM, + RESOURCE_USAGE_RESOLVE_TO, RESOURCE_USAGE_UNIFORM_BUFFER_READ, RESOURCE_USAGE_INDIRECT_BUFFER_READ, RESOURCE_USAGE_TEXTURE_BUFFER_READ, @@ -161,6 +163,7 @@ public: RDD::BufferID buffer_driver_id; RDD::TextureID texture_driver_id; RDD::TextureSubresourceRange texture_subresources; + uint32_t texture_usage = 0; int32_t texture_slice_command_index = -1; ResourceTracker *parent = nullptr; ResourceTracker *dirty_shared_list = nullptr; @@ -337,7 +340,15 @@ private: struct RecordedTextureCopyCommand : RecordedCommand { RDD::TextureID from_texture; RDD::TextureID to_texture; - RDD::TextureCopyRegion region; + uint32_t texture_copy_regions_count = 0; + + _FORCE_INLINE_ RDD::TextureCopyRegion *texture_copy_regions() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::TextureCopyRegion *texture_copy_regions() const { + return reinterpret_cast(&this[1]); + } }; struct RecordedTextureGetDataCommand : RecordedCommand { @@ -596,7 +607,8 @@ private: int32_t command_synchronization_index = -1; bool command_synchronization_pending = false; BarrierGroup barrier_group; - bool driver_honors_barriers = false; + bool driver_honors_barriers : 1; + bool driver_clears_with_copy_engine : 1; WorkaroundsState workarounds_state; TightLocalVector frames; uint32_t frame = 0; @@ -672,10 +684,10 @@ public: void add_draw_list_usages(VectorView p_trackers, VectorView p_usages); void add_draw_list_end(); void add_texture_clear(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, const Color &p_color, const RDD::TextureSubresourceRange &p_range); - void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region); - void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions); + void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_texture_copy_regions); + void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions, ResourceTracker *p_dst_tracker = nullptr); void add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap); - void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); + void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies, VectorView p_buffer_trackers = VectorView()); void add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index); void add_synchronization(); void begin_label(const String &p_label_name, const Color &p_color);