From 3f530c7091cbf71412afbe26eaf4e5306b5326d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Fri, 16 Feb 2024 23:44:50 +0100 Subject: [PATCH] Direct3D 12: Implement proper fallback for format casting --- .../d3d12/rendering_device_driver_d3d12.cpp | 566 ++++++++++-------- drivers/d3d12/rendering_device_driver_d3d12.h | 35 +- 2 files changed, 339 insertions(+), 262 deletions(-) diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index cb4fec7cd8d..381d022a55a 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -530,7 +530,7 @@ static const D3D12_RESOURCE_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_RESOURCE_DIMENSIO D3D12_RESOURCE_DIMENSION_TEXTURE2D, }; -void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state, ID3D12Resource *p_resource_override) { +void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state) { DEV_ASSERT(p_subresource != UINT32_MAX); // We don't support an "all-resources" command here. #ifdef DEBUG_COUNT_BARRIERS @@ -538,9 +538,16 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso #endif ResourceInfo::States *res_states = p_resource->states_ptr; - D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; - ID3D12Resource *res_to_transition = p_resource_override ? p_resource_override : p_resource->resource; + if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { + if (unlikely(!res_states->xfamily_fallback.subresources_dirty.is_empty())) { + uint32_t subres_qword = p_subresource / 64; + uint64_t subres_mask = (uint64_t(1) << (p_subresource % 64)); + res_states->xfamily_fallback.subresources_dirty[subres_qword] |= subres_mask; + } + } + + D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; // Transitions can be considered redundant if the current state has all the bits of the new state. // This check does not apply to the common state however, which must resort to checking if the state is the same (0). @@ -553,7 +560,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso if (res_barriers.size() < res_barriers_count + 1) { res_barriers.resize(res_barriers_count + 1); } - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(res_to_transition); + res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(p_resource->resource); res_barriers_count++; res_states->last_batch_with_uav_barrier = res_barriers_batch; } @@ -563,7 +570,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso if (res_barriers_requests.has(res_states)) { BarrierRequest &br = res_barriers_requests.get(res_states); - DEV_ASSERT(br.dx_resource == res_to_transition); + DEV_ASSERT(br.dx_resource == p_resource->resource); DEV_ASSERT(br.subres_mask_qwords == STEPIFY(res_states->subresource_states.size(), 64) / 64); DEV_ASSERT(br.planes == p_num_planes); @@ -681,7 +688,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso } } else { BarrierRequest &br = res_barriers_requests[res_states]; - br.dx_resource = res_to_transition; + br.dx_resource = p_resource->resource; br.subres_mask_qwords = STEPIFY(p_resource->states_ptr->subresource_states.size(), 64) / 64; CRASH_COND(p_resource->states_ptr->subresource_states.size() > BarrierRequest::MAX_SUBRESOURCES); br.planes = p_num_planes; @@ -1131,19 +1138,13 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p // If views of different families are wanted, special setup is needed for proper sharing among them. // Two options here: - // 1. If ID3DDevice10 is present and driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). - // 2. Otherwise, fall back to an approach based on abusing aliasing, hoping for the best. [[CROSS_FAMILY_ALIASING]] - if (p_format.shareable_formats.size()) { - if (format_capabilities.relaxed_casting_supported) { - ComPtr device_10; - device->QueryInterface(device_10.GetAddressOf()); - if (device_10) { - relaxed_casting_available = true; - relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size()); - relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format; - relaxed_casting_format_count++; - } - } + // 1. If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). + // 2. Otherwise, fall back to an approach based on having multiple versions of the resource and copying as needed. [[CROSS_FAMILY_FALLBACK]] + if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) { + relaxed_casting_available = true; + relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size()); + relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format; + relaxed_casting_format_count++; } HashMap aliases_forbidden_flags; @@ -1164,9 +1165,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p } if (cross_family_sharing && !relaxed_casting_available) { - // At least guarantee the same layout among aliases. - resource_desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; - // Per https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_texture_layout. if (p_format.texture_type == TEXTURE_TYPE_1D) { ERR_FAIL_V_MSG(TextureID(), "This texture's views require aliasing, but that's not supported for a 1D texture."); @@ -1217,9 +1215,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p // Create. D3D12MA::ALLOCATION_DESC allocation_desc = {}; - if (cross_family_sharing && !relaxed_casting_available) { - allocation_desc.Flags = D3D12MA::ALLOCATION_FLAG_CAN_ALIAS; - } allocation_desc.HeapType = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT; if ((resource_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) { allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; @@ -1339,53 +1334,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = main_uav_desc; uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; - // Create aliases if needed. [[CROSS_FAMILY_ALIASING]] - - using AliasEntry = Pair; - AliasEntry *aliases = nullptr; - uint32_t alias_count = 0; - if (cross_family_sharing && !relaxed_casting_available) { - aliases = ALLOCA_ARRAY(AliasEntry, p_format.shareable_formats.size()); - - for (int i = 0; i < p_format.shareable_formats.size(); i++) { - DataFormat curr_format = p_format.shareable_formats[i]; - - DXGI_FORMAT format_family = RD_TO_D3D12_FORMAT[curr_format].family; - if (format_family == RD_TO_D3D12_FORMAT[p_format.format].family) { - continue; - } - - D3D12_RESOURCE_DESC alias_resource_desc = *(D3D12_RESOURCE_DESC *)&resource_desc; - alias_resource_desc.Format = format_family; - clear_value.Format = format_family; - if ((alias_resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) { - if (!texture_get_usages_supported_by_format(curr_format, false).has_flag(TEXTURE_USAGE_STORAGE_BIT)) { - alias_resource_desc.Flags &= ~D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - } - } - ID3D12Resource *alias = nullptr; - HRESULT res = allocator->CreateAliasingResource( - allocation.Get(), - 0, - &alias_resource_desc, - initial_state, - (alias_resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? clear_value_ptr : nullptr, - IID_PPV_ARGS(&alias)); - if (!SUCCEEDED(res)) { - for (uint32_t j = 0; j < alias_count; j++) { - aliases[j].second->Release(); - } - ERR_FAIL_V_MSG(TextureID(), "CreateAliasingResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - } - aliases[alias_count] = AliasEntry(format_family, alias); - alias_count++; - - if (curr_format == p_view.format) { - texture = alias; - } - } - } - // Bookkeep. TextureInfo *tex_info = VersatileResource::allocate(resources_allocator); @@ -1405,13 +1353,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p tex_info->mipmaps = resource_desc.MipLevels; tex_info->view_descs.srv = srv_desc; tex_info->view_descs.uav = uav_desc; - tex_info->main_texture = main_texture.Get(); - tex_info->aliasing_hack.main_uav_desc = main_uav_desc; - if (alias_count) { - for (uint32_t i = 0; i < alias_count; i++) { - tex_info->aliasing_hack.owner_info.aliases.insert(aliases[i].first, aliases[i].second); - } - } if ((p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) { textures_pending_clear.add(&tex_info->pending_clear); } @@ -1424,75 +1365,59 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create_from_extension(uint64_ } RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) { - const TextureInfo *owner_tex_info = (const TextureInfo *)p_original_texture.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(!owner_tex_info->owner_info.allocation, TextureID()); -#endif - - ID3D12Resource *texture = nullptr; - if (owner_tex_info->aliasing_hack.owner_info.aliases.is_empty()) { - texture = owner_tex_info->resource; - } else { - texture = owner_tex_info->main_texture; - for (const KeyValue> &E : owner_tex_info->aliasing_hack.owner_info.aliases) { - if (E.key == RD_TO_D3D12_FORMAT[p_view.format].family) { - texture = E.value.Get(); - break; - } - } - } - - // Describe views. - - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = owner_tex_info->view_descs.srv; - { - srv_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; - srv_desc.Shader4ComponentMapping = _compute_component_mapping(p_view); - } - - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = owner_tex_info->view_descs.uav; - { - uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; - } - - // Bookkeep. - - TextureInfo *tex_info = VersatileResource::allocate(resources_allocator); - tex_info->resource = texture; - tex_info->states_ptr = owner_tex_info->states_ptr; - tex_info->format = p_view.format; - tex_info->desc = owner_tex_info->desc; - tex_info->base_layer = owner_tex_info->base_layer; - tex_info->layers = owner_tex_info->layers; - tex_info->base_mip = owner_tex_info->base_mip; - tex_info->mipmaps = owner_tex_info->mipmaps; - tex_info->view_descs.srv = srv_desc; - tex_info->view_descs.uav = uav_desc; - tex_info->main_texture = owner_tex_info->main_texture; - tex_info->aliasing_hack.main_uav_desc = owner_tex_info->aliasing_hack.main_uav_desc; - - return TextureID(tex_info); + return _texture_create_shared_from_slice(p_original_texture, p_view, (TextureSliceType)-1, 0, 0, 0, 0); } RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) { - const TextureInfo *owner_tex_info = (const TextureInfo *)p_original_texture.id; + return _texture_create_shared_from_slice(p_original_texture, p_view, p_slice_type, p_layer, p_layers, p_mipmap, p_mipmaps); +} + +RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) { + TextureInfo *owner_tex_info = (TextureInfo *)p_original_texture.id; #ifdef DEBUG_ENABLED ERR_FAIL_COND_V(!owner_tex_info->owner_info.allocation, TextureID()); #endif - // Find appropriate resource instance. + ComPtr new_texture; + ComPtr new_allocation; + ID3D12Resource *resource = nullptr; + CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc; + bool cross_family = RD_TO_D3D12_FORMAT[p_view.format].family != RD_TO_D3D12_FORMAT[owner_tex_info->format].family; + if (cross_family && !format_capabilities.relaxed_casting_supported) { + // [[CROSS_FAMILY_FALLBACK]]. + // We have to create a new texture of the alternative format. - ID3D12Resource *texture = nullptr; - if (owner_tex_info->aliasing_hack.owner_info.aliases.is_empty()) { - texture = owner_tex_info->resource; - } else { - texture = owner_tex_info->main_texture; - for (const KeyValue> &E : owner_tex_info->aliasing_hack.owner_info.aliases) { - if (E.key == RD_TO_D3D12_FORMAT[p_view.format].family) { - texture = E.value.Get(); - break; + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + + if (p_slice_type != -1) { +#ifdef DEV_ENABLED + // Actual slicing is not contemplated. If ever needed, let's at least realize. + if (p_slice_type != -1) { + uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; + uint32_t slice_subresorce_count = p_mipmaps * p_layers; + DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); } +#endif + new_tex_resource_desc.DepthOrArraySize = p_layers; + new_tex_resource_desc.MipLevels = p_mipmaps; } + new_tex_resource_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].family; + new_tex_resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // Alternative formats can only be used as SRVs. + + HRESULT res = allocator->CreateResource( + &allocation_desc, + &new_tex_resource_desc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + new_allocation.GetAddressOf(), + IID_PPV_ARGS(new_texture.GetAddressOf())); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), vformat("D3D12MA::CreateResource failed with error 0x%08ux.", (uint64_t)res)); + + resource = new_texture.Get(); + } else { + resource = owner_tex_info->resource; } // Describe views. @@ -1508,110 +1433,169 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(Text uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; } - // Complete description with slicing. + if (p_slice_type != -1) { + // Complete description with slicing. - switch (p_slice_type) { - case TEXTURE_SLICE_2D: { - if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer == 0) { - srv_desc.Texture2D.MostDetailedMip = p_mipmap; - srv_desc.Texture2D.MipLevels = p_mipmaps; + switch (p_slice_type) { + case TEXTURE_SLICE_2D: { + if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer == 0) { + srv_desc.Texture2D.MostDetailedMip = p_mipmap; + srv_desc.Texture2D.MipLevels = p_mipmaps; - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2D); - uav_desc.Texture1D.MipSlice = p_mipmap; - } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer == 0) { - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_UNKNOWN); - } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer)) || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2D); + uav_desc.Texture1D.MipSlice = p_mipmap; + } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer == 0) { + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_UNKNOWN); + } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer)) || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv_desc.Texture2DArray.MostDetailedMip = p_mipmap; + srv_desc.Texture2DArray.MipLevels = p_mipmaps; + srv_desc.Texture2DArray.FirstArraySlice = p_layer; + srv_desc.Texture2DArray.ArraySize = 1; + srv_desc.Texture2DArray.PlaneSlice = 0; + srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; + + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uav_desc.Texture2DArray.MipSlice = p_mipmap; + uav_desc.Texture2DArray.FirstArraySlice = p_layer; + uav_desc.Texture2DArray.ArraySize = 1; + uav_desc.Texture2DArray.PlaneSlice = 0; + } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer))) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv_desc.Texture2DMSArray.FirstArraySlice = p_layer; + srv_desc.Texture2DMSArray.ArraySize = 1; + + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; + } else { + DEV_ASSERT(false); + } + } break; + case TEXTURE_SLICE_CUBEMAP: { + if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + srv_desc.TextureCube.MostDetailedMip = p_mipmap; + srv_desc.TextureCube.MipLevels = p_mipmaps; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); + uav_desc.Texture2DArray.MipSlice = p_mipmap; + uav_desc.Texture2DArray.FirstArraySlice = p_layer; + uav_desc.Texture2DArray.ArraySize = 6; + uav_desc.Texture2DArray.PlaneSlice = 0; + } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || p_layer != 0) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + srv_desc.TextureCubeArray.MostDetailedMip = p_mipmap; + srv_desc.TextureCubeArray.MipLevels = p_mipmaps; + srv_desc.TextureCubeArray.First2DArrayFace = p_layer; + srv_desc.TextureCubeArray.NumCubes = 1; + srv_desc.TextureCubeArray.ResourceMinLODClamp = 0.0f; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); + uav_desc.Texture2DArray.MipSlice = p_mipmap; + uav_desc.Texture2DArray.FirstArraySlice = p_layer; + uav_desc.Texture2DArray.ArraySize = 6; + uav_desc.Texture2DArray.PlaneSlice = 0; + } else { + DEV_ASSERT(false); + } + } break; + case TEXTURE_SLICE_3D: { + DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE3D); + srv_desc.Texture3D.MostDetailedMip = p_mipmap; + srv_desc.Texture3D.MipLevels = p_mipmaps; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE3D); + uav_desc.Texture3D.MipSlice = p_mipmap; + uav_desc.Texture3D.WSize = -1; + } break; + case TEXTURE_SLICE_2D_ARRAY: { + DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY); srv_desc.Texture2DArray.MostDetailedMip = p_mipmap; srv_desc.Texture2DArray.MipLevels = p_mipmaps; srv_desc.Texture2DArray.FirstArraySlice = p_layer; - srv_desc.Texture2DArray.ArraySize = 1; - srv_desc.Texture2DArray.PlaneSlice = 0; - srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; - - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - uav_desc.Texture2DArray.MipSlice = p_mipmap; - uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = 1; - uav_desc.Texture2DArray.PlaneSlice = 0; - } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer))) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - srv_desc.Texture2DMSArray.FirstArraySlice = p_layer; - srv_desc.Texture2DMSArray.ArraySize = 1; - - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; - } else { - DEV_ASSERT(false); - } - } break; - case TEXTURE_SLICE_CUBEMAP: { - if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - srv_desc.TextureCube.MostDetailedMip = p_mipmap; - srv_desc.TextureCube.MipLevels = p_mipmaps; + srv_desc.Texture2DArray.ArraySize = p_layers; DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); uav_desc.Texture2DArray.MipSlice = p_mipmap; uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = 6; - uav_desc.Texture2DArray.PlaneSlice = 0; - } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || p_layer != 0) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - srv_desc.TextureCubeArray.MostDetailedMip = p_mipmap; - srv_desc.TextureCubeArray.MipLevels = p_mipmaps; - srv_desc.TextureCubeArray.First2DArrayFace = p_layer; - srv_desc.TextureCubeArray.NumCubes = 1; - srv_desc.TextureCubeArray.ResourceMinLODClamp = 0.0f; - - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - uav_desc.Texture2DArray.MipSlice = p_mipmap; - uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = 6; - uav_desc.Texture2DArray.PlaneSlice = 0; - } else { - DEV_ASSERT(false); - } - } break; - case TEXTURE_SLICE_3D: { - DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE3D); - srv_desc.Texture3D.MostDetailedMip = p_mipmap; - srv_desc.Texture3D.MipLevels = p_mipmaps; - - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE3D); - uav_desc.Texture3D.MipSlice = p_mipmap; - uav_desc.Texture3D.WSize = -1; - } break; - case TEXTURE_SLICE_2D_ARRAY: { - DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY); - srv_desc.Texture2DArray.MostDetailedMip = p_mipmap; - srv_desc.Texture2DArray.MipLevels = p_mipmaps; - srv_desc.Texture2DArray.FirstArraySlice = p_layer; - srv_desc.Texture2DArray.ArraySize = p_layers; - - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - uav_desc.Texture2DArray.MipSlice = p_mipmap; - uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = p_layers; - } break; - default: - break; + uav_desc.Texture2DArray.ArraySize = p_layers; + } break; + default: + break; + } } // Bookkeep. TextureInfo *tex_info = VersatileResource::allocate(resources_allocator); - tex_info->resource = texture; - tex_info->states_ptr = owner_tex_info->states_ptr; + tex_info->resource = resource; + if (new_texture.Get()) { + // [[CROSS_FAMILY_FALLBACK]]. + + DEV_ASSERT(cross_family && !format_capabilities.relaxed_casting_supported); + + uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; +#ifdef DEV_ENABLED + // Actual slicing is not contemplated. If ever needed, let's at least realize. + if (p_slice_type != -1) { + uint32_t slice_subresorce_count = p_mipmaps * p_layers; + DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); + } +#endif + + tex_info->owner_info.resource = new_texture; + tex_info->owner_info.allocation = new_allocation; + tex_info->owner_info.states.subresource_states.resize(new_texture_subresorce_count); + for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) { + tex_info->owner_info.states.subresource_states[i] = D3D12_RESOURCE_STATE_COPY_DEST; + } + tex_info->states_ptr = &tex_info->owner_info.states; + + ResourceInfo::States::CrossFamillyFallback &xfamily = owner_tex_info->owner_info.states.xfamily_fallback; + if (xfamily.subresources_dirty.is_empty()) { + uint32_t items_required = STEPIFY(new_texture_subresorce_count, sizeof(uint64_t)) / sizeof(uint64_t); + xfamily.subresources_dirty.resize(items_required); + memset(xfamily.subresources_dirty.ptr(), 255, sizeof(uint64_t) * xfamily.subresources_dirty.size()); + + // Create buffer for non-direct copy if it's a format not supporting reinterpret-copy. + DEV_ASSERT(!xfamily.interim_buffer.Get()); + if (owner_tex_info->format == DATA_FORMAT_R16_UINT && p_view.format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) { + uint32_t row_pitch = STEPIFY(owner_tex_info->desc.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + uint32_t buffer_size = sizeof(uint16_t) * row_pitch * owner_tex_info->desc.Height * owner_tex_info->desc.Depth(); + CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(buffer_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + + HRESULT res = allocator->CreateResource( + &allocation_desc, + &resource_desc, + D3D12_RESOURCE_STATE_COPY_SOURCE, // Makes the code that makes the copy easier. + nullptr, + xfamily.interim_buffer_alloc.GetAddressOf(), + IID_PPV_ARGS(xfamily.interim_buffer.GetAddressOf())); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } + } + } else { + tex_info->states_ptr = owner_tex_info->states_ptr; + } tex_info->format = p_view.format; - tex_info->desc = owner_tex_info->desc; - tex_info->base_layer = p_layer; - tex_info->layers = p_layers; - tex_info->base_mip = p_mipmap; - tex_info->mipmaps = p_mipmaps; + tex_info->desc = new_tex_resource_desc; + if (p_slice_type == -1) { + tex_info->base_layer = owner_tex_info->base_layer; + tex_info->layers = owner_tex_info->layers; + tex_info->base_mip = owner_tex_info->base_mip; + tex_info->mipmaps = owner_tex_info->mipmaps; + } else { + tex_info->base_layer = p_layer; + tex_info->layers = p_layers; + tex_info->base_mip = p_mipmap; + tex_info->mipmaps = p_mipmaps; + } tex_info->view_descs.srv = srv_desc; tex_info->view_descs.uav = uav_desc; - tex_info->main_texture = owner_tex_info->main_texture; - tex_info->aliasing_hack.main_uav_desc = owner_tex_info->aliasing_hack.main_uav_desc; + tex_info->main_texture = owner_tex_info; return TextureID(tex_info); } @@ -2375,7 +2359,7 @@ D3D12_RENDER_TARGET_VIEW_DESC RenderingDeviceDriverD3D12::_make_rtv_for_texture( } D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases) { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = p_texture_info->aliasing_hack.main_uav_desc; + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = p_texture_info->view_descs.uav; uint32_t mip = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset; switch (p_texture_info->view_descs.uav.ViewDimension) { @@ -3884,13 +3868,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } DEV_ASSERT((wanted_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) == (bool)(wanted_state & D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); - - if (wanted_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS || wanted_state == D3D12_RESOURCE_STATE_RENDER_TARGET) { - if (!sr.is_buffer) { - TextureInfo *tex_info = (TextureInfo *)sr.resource; - CRASH_COND_MSG(tex_info->resource != tex_info->main_texture, "The texture format used for UAV or RTV must be the main one."); - } - } } } #endif @@ -3970,7 +3947,35 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff for (uint32_t i = 0; i < tex_info->layers; i++) { for (uint32_t j = 0; j < tex_info->mipmaps; j++) { uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, planes, wanted_state, tex_info->main_texture); + + if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) { + // [[CROSS_FAMILY_FALLBACK]]. + if (tex_info->owner_info.resource && tex_info->main_texture && tex_info->main_texture != tex_info) { + uint32_t subres_qword = subresource / 64; + uint64_t subres_mask = (uint64_t(1) << (subresource % 64)); + if ((tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] & subres_mask)) { + // Prepare for copying the write-to texture to this one, if out-of-date. + _resource_transition_batch(tex_info->main_texture, subresource, planes, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(tex_info, subresource, planes, D3D12_RESOURCE_STATE_COPY_DEST); + + CommandBufferInfo::FamilyFallbackCopy ffc; + ffc.texture = tex_info; + ffc.subresource = subresource; + ffc.mipmap = j; + ffc.dst_wanted_state = wanted_state; + + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + cmd_buf_info->family_fallback_copies.resize(cmd_buf_info->family_fallback_copies.size() + 1); + cmd_buf_info->family_fallback_copies[cmd_buf_info->family_fallback_copy_count] = ffc; + cmd_buf_info->family_fallback_copy_count++; + + tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] &= ~subres_mask; + } + continue; + } + } + + _resource_transition_batch(tex_info, subresource, planes, wanted_state); } } } @@ -3979,7 +3984,56 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } if (p_set_index == shader_info_in->sets.size() - 1) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + + // [[CROSS_FAMILY_FALLBACK]]. + for (uint32_t i = 0; i < cmd_buf_info->family_fallback_copy_count; i++) { + const CommandBufferInfo::FamilyFallbackCopy &ffc = cmd_buf_info->family_fallback_copies[i]; + + D3D12_TEXTURE_COPY_LOCATION dst_tex = {}; + dst_tex.pResource = ffc.texture->resource; + dst_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_tex.SubresourceIndex = ffc.subresource; + + D3D12_TEXTURE_COPY_LOCATION src_tex = {}; + src_tex.pResource = ffc.texture->main_texture->resource; + src_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_tex.SubresourceIndex = ffc.subresource; + + const ResourceInfo::States::CrossFamillyFallback &xfamily = ffc.texture->main_texture->owner_info.states.xfamily_fallback; + if (xfamily.interim_buffer.Get()) { + // Must copy via a buffer due to reinterpret-copy known not to be available for these data types. + D3D12_TEXTURE_COPY_LOCATION buf_loc = {}; + buf_loc.pResource = xfamily.interim_buffer.Get(); + buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + buf_loc.PlacedFootprint.Offset = 0; + buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->main_texture->desc.Format; + buf_loc.PlacedFootprint.Footprint.Width = MAX(1u, ffc.texture->main_texture->desc.Width >> ffc.mipmap); + buf_loc.PlacedFootprint.Footprint.Height = MAX(1u, ffc.texture->main_texture->desc.Height >> ffc.mipmap); + buf_loc.PlacedFootprint.Footprint.Depth = MAX(1u, (uint32_t)ffc.texture->main_texture->desc.Depth() >> ffc.mipmap); + buf_loc.PlacedFootprint.Footprint.RowPitch = STEPIFY(buf_loc.PlacedFootprint.Footprint.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST); + cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); + + cmd_buf_info->cmd_list->CopyTextureRegion(&buf_loc, 0, 0, 0, &src_tex, nullptr); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); + cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); + + buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->desc.Format; + cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &buf_loc, nullptr); + } else { + // Direct copy is possible. + cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &src_tex, nullptr); + } + + // Set the specific SRV state we wanted from the beginning to the alternative version of the texture. + _resource_transition_batch(ffc.texture, ffc.subresource, 1, ffc.dst_wanted_state); + } + cmd_buf_info->family_fallback_copy_count = 0; + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } } @@ -4293,17 +4347,17 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff frames[frame_idx].desc_heap_walkers.aux.advance(); } -void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView p_regions) { +void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_buf_locfer, VectorView p_regions) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id; - BufferInfo *dst_buf_info = (BufferInfo *)p_dst_buffer.id; + BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id; _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(dst_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); for (uint32_t i = 0; i < p_regions.size(); i++) { - cmd_buf_info->cmd_list->CopyBufferRegion(dst_buf_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size); + cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size); } } @@ -4371,7 +4425,9 @@ void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_b void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_texture.id; - ID3D12Resource *res_to_clear = tex_info->main_texture ? tex_info->main_texture : tex_info->resource; + if (tex_info->main_texture) { + tex_info = tex_info->main_texture; + } auto _transition_subresources = [&](D3D12_RESOURCE_STATES p_new_state) { for (uint32_t i = 0; i < p_subresources.layer_count; i++) { @@ -4382,7 +4438,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, 1, p_new_state, tex_info->main_texture); + _resource_transition_batch(tex_info, subresource, 1, p_new_state); } } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); @@ -4408,7 +4464,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); rtv_desc.Format = tex_info->view_descs.uav.Format; device->CreateRenderTargetView( - res_to_clear, + tex_info->resource, &rtv_desc, frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle()); @@ -4450,7 +4506,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); device->CreateUnorderedAccessView( - res_to_clear, + tex_info->resource, nullptr, &uav_desc, frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle()); @@ -4469,7 +4525,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint( frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(), frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), - res_to_clear, + tex_info->resource, values, 0, nullptr); @@ -4486,7 +4542,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id; if (buf_info->flags.is_for_upload) { - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE, nullptr); + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } uint32_t pixel_size = get_image_format_pixel_size(tex_info->format); @@ -4522,7 +4578,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID tex_info->desc.ArraySize()); CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST, tex_info->main_texture); + _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); @@ -4547,12 +4603,12 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID } } -void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { +void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_buf_locfer, VectorView p_regions) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_src_texture.id; - BufferInfo *buf_info = (BufferInfo *)p_dst_buffer.id; + BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id; - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST, nullptr); + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); uint32_t block_w = 0, block_h = 0; get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h); @@ -4566,7 +4622,7 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE, tex_info->main_texture); + _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); @@ -4714,7 +4770,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd 0, p_texture_info->desc.MipLevels, p_texture_info->desc.ArraySize()); - _resource_transition_batch(p_texture_info, subresource, planes, p_states, nullptr); + _resource_transition_batch(p_texture_info, subresource, planes, p_states); } } }; @@ -6182,7 +6238,15 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { } if (format_capabilities.relaxed_casting_supported) { +#if 0 print_verbose("- Relaxed casting supported"); +#else + // Certain configurations (Windows 11 with an updated Nvida driver) crash when using relaxed casting. + // Therefore, we disable it temporarily until we can assure that it's reliable. + // There are fallbacks in place that work in every case, if less efficient. + format_capabilities.relaxed_casting_supported = false; + print_verbose("- Relaxed casting supported (but disabled for now)"); +#endif } else { print_verbose("- Relaxed casting not supported"); } diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 50346f2a024..852cb9db0e6 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -217,15 +217,20 @@ private: // As many subresources as mipmaps * layers; planes (for depth-stencil) are tracked together. TightLocalVector subresource_states; // Used only if not a view. uint32_t last_batch_with_uav_barrier = 0; + struct CrossFamillyFallback { + TightLocalVector subresources_dirty; + ComPtr interim_buffer; + ComPtr interim_buffer_alloc; + } xfamily_fallback; // [[CROSS_FAMILY_FALLBACK]]. }; - ID3D12Resource *resource = nullptr; // Non-null even if a view. + ID3D12Resource *resource = nullptr; // Non-null even if not owned. struct { ComPtr resource; ComPtr allocation; States states; - } owner_info; // All empty if a view. - States *states_ptr = nullptr; // Own or from another if a view. + } owner_info; // All empty if the resource is not owned. + States *states_ptr = nullptr; // Own or from another if it doesn't own the D3D12 resource. }; struct BarrierRequest { @@ -257,7 +262,7 @@ private: uint64_t frame_barriers_cpu_time = 0; #endif - void _resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state, ID3D12Resource *p_resource_override = nullptr); + void _resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state); void _resource_transitions_flush(ID3D12GraphicsCommandList *p_cmd_list); /*****************/ @@ -298,13 +303,7 @@ private: D3D12_UNORDERED_ACCESS_VIEW_DESC uav; } view_descs = {}; - ID3D12Resource *main_texture = nullptr; - struct { - D3D12_UNORDERED_ACCESS_VIEW_DESC main_uav_desc; - struct { - HashMap> aliases; // Key is the DXGI format family. - } owner_info = {}; - } aliasing_hack = {}; // [[CROSS_FAMILY_ALIASING]] + TextureInfo *main_texture = nullptr; UINT mapped_subresource = UINT_MAX; SelfList pending_clear{ this }; @@ -333,6 +332,10 @@ public: virtual void texture_unmap(TextureID p_texture) override final; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; +private: + TextureID _texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps); + +public: /*****************/ /**** SAMPLER ****/ /*****************/ @@ -461,6 +464,16 @@ private: RenderPassState render_pass_state; bool descriptor_heaps_set = false; + + // [[CROSS_FAMILY_FALLBACK]]. + struct FamilyFallbackCopy { + TextureInfo *texture = nullptr; + uint32_t subresource = 0; + uint32_t mipmap = 0; + D3D12_RESOURCE_STATES dst_wanted_state = {}; + }; + LocalVector family_fallback_copies; + uint32_t family_fallback_copy_count = 0; }; public: