godot/drivers/vulkan/rendering_device_vulkan.cpp
Pedro J. Estébanez 801741e787 vk_mem_alloc: Update to upstream + Adapt approach to small objects pooling
This updates VMA and instead of using the custom small pool approach from 4e6c9d3ae9, lazily creates pools for the relevant memory type indices, which doesn't require patching VMA.

Also, patches already merged upstream or not needed any longer are removed.
2022-02-24 14:30:55 +01:00

9481 lines
384 KiB
C++

/*************************************************************************/
/* rendering_device_vulkan.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "rendering_device_vulkan.h"
#include "core/config/project_settings.h"
#include "core/io/compression.h"
#include "core/io/file_access.h"
#include "core/io/marshalls.h"
#include "core/os/os.h"
#include "core/templates/hashfuncs.h"
#include "drivers/vulkan/vulkan_context.h"
#include "thirdparty/misc/smolv.h"
#include "thirdparty/spirv-reflect/spirv_reflect.h"
//#define FORCE_FULL_BARRIER
static const uint32_t SMALL_ALLOCATION_MAX_SIZE = 4096;
// Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values)
RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, uint32_t p_post_barrier) {
Buffer *buffer = nullptr;
if (vertex_buffer_owner.owns(p_buffer)) {
buffer = vertex_buffer_owner.get_or_null(p_buffer);
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) {
if (p_post_barrier & BARRIER_MASK_RASTER) {
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
}
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
}
} else if (index_buffer_owner.owns(p_buffer)) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
r_access_mask |= VK_ACCESS_INDEX_READ_BIT;
buffer = index_buffer_owner.get_or_null(p_buffer);
} else if (uniform_buffer_owner.owns(p_buffer)) {
if (p_post_barrier & BARRIER_MASK_RASTER) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
}
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT;
buffer = uniform_buffer_owner.get_or_null(p_buffer);
} else if (texture_buffer_owner.owns(p_buffer)) {
if (p_post_barrier & BARRIER_MASK_RASTER) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT;
}
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT;
}
buffer = &texture_buffer_owner.get_or_null(p_buffer)->buffer;
} else if (storage_buffer_owner.owns(p_buffer)) {
buffer = storage_buffer_owner.get_or_null(p_buffer);
if (p_post_barrier & BARRIER_MASK_RASTER) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (buffer->usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) {
r_stage_mask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
r_access_mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
}
return buffer;
}
static void update_external_dependency_for_store(VkSubpassDependency &dependency, bool is_sampled, bool is_storage, bool is_depth) {
// Transitioning from write to read, protect the shaders that may use this next
// Allow for copies/image layout transitions
dependency.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT;
dependency.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
if (is_sampled) {
dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
} else if (is_storage) {
dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
} else {
dependency.dstStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
if (is_depth) {
// Depth resources have additional stages that may be interested in them
dependency.dstStageMask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
}
void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) {
if (!dependency_map.has(p_depends_on)) {
dependency_map[p_depends_on] = Set<RID>();
}
dependency_map[p_depends_on].insert(p_id);
if (!reverse_dependency_map.has(p_id)) {
reverse_dependency_map[p_id] = Set<RID>();
}
reverse_dependency_map[p_id].insert(p_depends_on);
}
void RenderingDeviceVulkan::_free_dependencies(RID p_id) {
//direct dependencies must be freed
Map<RID, Set<RID>>::Element *E = dependency_map.find(p_id);
if (E) {
while (E->get().size()) {
free(E->get().front()->get());
}
dependency_map.erase(E);
}
//reverse dependencies must be unreferenced
E = reverse_dependency_map.find(p_id);
if (E) {
for (Set<RID>::Element *F = E->get().front(); F; F = F->next()) {
Map<RID, Set<RID>>::Element *G = dependency_map.find(F->get());
ERR_CONTINUE(!G);
ERR_CONTINUE(!G->get().has(p_id));
G->get().erase(p_id);
}
reverse_dependency_map.erase(E);
}
}
const VkFormat RenderingDeviceVulkan::vulkan_formats[RenderingDevice::DATA_FORMAT_MAX] = {
VK_FORMAT_R4G4_UNORM_PACK8,
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
VK_FORMAT_B4G4R4A4_UNORM_PACK16,
VK_FORMAT_R5G6B5_UNORM_PACK16,
VK_FORMAT_B5G6R5_UNORM_PACK16,
VK_FORMAT_R5G5B5A1_UNORM_PACK16,
VK_FORMAT_B5G5R5A1_UNORM_PACK16,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_FORMAT_R8_UNORM,
VK_FORMAT_R8_SNORM,
VK_FORMAT_R8_USCALED,
VK_FORMAT_R8_SSCALED,
VK_FORMAT_R8_UINT,
VK_FORMAT_R8_SINT,
VK_FORMAT_R8_SRGB,
VK_FORMAT_R8G8_UNORM,
VK_FORMAT_R8G8_SNORM,
VK_FORMAT_R8G8_USCALED,
VK_FORMAT_R8G8_SSCALED,
VK_FORMAT_R8G8_UINT,
VK_FORMAT_R8G8_SINT,
VK_FORMAT_R8G8_SRGB,
VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8_USCALED,
VK_FORMAT_R8G8B8_SSCALED,
VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8_SRGB,
VK_FORMAT_B8G8R8_UNORM,
VK_FORMAT_B8G8R8_SNORM,
VK_FORMAT_B8G8R8_USCALED,
VK_FORMAT_B8G8R8_SSCALED,
VK_FORMAT_B8G8R8_UINT,
VK_FORMAT_B8G8R8_SINT,
VK_FORMAT_B8G8R8_SRGB,
VK_FORMAT_R8G8B8A8_UNORM,
VK_FORMAT_R8G8B8A8_SNORM,
VK_FORMAT_R8G8B8A8_USCALED,
VK_FORMAT_R8G8B8A8_SSCALED,
VK_FORMAT_R8G8B8A8_UINT,
VK_FORMAT_R8G8B8A8_SINT,
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SNORM,
VK_FORMAT_B8G8R8A8_USCALED,
VK_FORMAT_B8G8R8A8_SSCALED,
VK_FORMAT_B8G8R8A8_UINT,
VK_FORMAT_B8G8R8A8_SINT,
VK_FORMAT_B8G8R8A8_SRGB,
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
VK_FORMAT_A8B8G8R8_USCALED_PACK32,
VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
VK_FORMAT_A8B8G8R8_UINT_PACK32,
VK_FORMAT_A8B8G8R8_SINT_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_A2R10G10B10_UNORM_PACK32,
VK_FORMAT_A2R10G10B10_SNORM_PACK32,
VK_FORMAT_A2R10G10B10_USCALED_PACK32,
VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
VK_FORMAT_A2R10G10B10_UINT_PACK32,
VK_FORMAT_A2R10G10B10_SINT_PACK32,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
VK_FORMAT_A2B10G10R10_SNORM_PACK32,
VK_FORMAT_A2B10G10R10_USCALED_PACK32,
VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A2B10G10R10_SINT_PACK32,
VK_FORMAT_R16_UNORM,
VK_FORMAT_R16_SNORM,
VK_FORMAT_R16_USCALED,
VK_FORMAT_R16_SSCALED,
VK_FORMAT_R16_UINT,
VK_FORMAT_R16_SINT,
VK_FORMAT_R16_SFLOAT,
VK_FORMAT_R16G16_UNORM,
VK_FORMAT_R16G16_SNORM,
VK_FORMAT_R16G16_USCALED,
VK_FORMAT_R16G16_SSCALED,
VK_FORMAT_R16G16_UINT,
VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16G16_SFLOAT,
VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16_USCALED,
VK_FORMAT_R16G16B16_SSCALED,
VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16_SFLOAT,
VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_R16G16B16A16_SNORM,
VK_FORMAT_R16G16B16A16_USCALED,
VK_FORMAT_R16G16B16A16_SSCALED,
VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R32_UINT,
VK_FORMAT_R32_SINT,
VK_FORMAT_R32_SFLOAT,
VK_FORMAT_R32G32_UINT,
VK_FORMAT_R32G32_SINT,
VK_FORMAT_R32G32_SFLOAT,
VK_FORMAT_R32G32B32_UINT,
VK_FORMAT_R32G32B32_SINT,
VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_UINT,
VK_FORMAT_R32G32B32A32_SINT,
VK_FORMAT_R32G32B32A32_SFLOAT,
VK_FORMAT_R64_UINT,
VK_FORMAT_R64_SINT,
VK_FORMAT_R64_SFLOAT,
VK_FORMAT_R64G64_UINT,
VK_FORMAT_R64G64_SINT,
VK_FORMAT_R64G64_SFLOAT,
VK_FORMAT_R64G64B64_UINT,
VK_FORMAT_R64G64B64_SINT,
VK_FORMAT_R64G64B64_SFLOAT,
VK_FORMAT_R64G64B64A64_UINT,
VK_FORMAT_R64G64B64A64_SINT,
VK_FORMAT_R64G64B64A64_SFLOAT,
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
VK_FORMAT_D16_UNORM,
VK_FORMAT_X8_D24_UNORM_PACK32,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_S8_UINT,
VK_FORMAT_D16_UNORM_S8_UINT,
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_BC1_RGB_UNORM_BLOCK,
VK_FORMAT_BC1_RGB_SRGB_BLOCK,
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
VK_FORMAT_BC2_UNORM_BLOCK,
VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC3_SRGB_BLOCK,
VK_FORMAT_BC4_UNORM_BLOCK,
VK_FORMAT_BC4_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK,
VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC6H_UFLOAT_BLOCK,
VK_FORMAT_BC6H_SFLOAT_BLOCK,
VK_FORMAT_BC7_UNORM_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK,
VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
VK_FORMAT_EAC_R11_UNORM_BLOCK,
VK_FORMAT_EAC_R11_SNORM_BLOCK,
VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
VK_FORMAT_G8B8G8R8_422_UNORM,
VK_FORMAT_B8G8R8G8_422_UNORM,
VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
VK_FORMAT_G8_B8R8_2PLANE_422_UNORM,
VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
VK_FORMAT_R10X6_UNORM_PACK16,
VK_FORMAT_R10X6G10X6_UNORM_2PACK16,
VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16,
VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
VK_FORMAT_R12X4_UNORM_PACK16,
VK_FORMAT_R12X4G12X4_UNORM_2PACK16,
VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16,
VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
VK_FORMAT_G16B16G16R16_422_UNORM,
VK_FORMAT_B16G16R16G16_422_UNORM,
VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
};
const char *RenderingDeviceVulkan::named_formats[RenderingDevice::DATA_FORMAT_MAX] = {
"R4G4_Unorm_Pack8",
"R4G4B4A4_Unorm_Pack16",
"B4G4R4A4_Unorm_Pack16",
"R5G6B5_Unorm_Pack16",
"B5G6R5_Unorm_Pack16",
"R5G5B5A1_Unorm_Pack16",
"B5G5R5A1_Unorm_Pack16",
"A1R5G5B5_Unorm_Pack16",
"R8_Unorm",
"R8_Snorm",
"R8_Uscaled",
"R8_Sscaled",
"R8_Uint",
"R8_Sint",
"R8_Srgb",
"R8G8_Unorm",
"R8G8_Snorm",
"R8G8_Uscaled",
"R8G8_Sscaled",
"R8G8_Uint",
"R8G8_Sint",
"R8G8_Srgb",
"R8G8B8_Unorm",
"R8G8B8_Snorm",
"R8G8B8_Uscaled",
"R8G8B8_Sscaled",
"R8G8B8_Uint",
"R8G8B8_Sint",
"R8G8B8_Srgb",
"B8G8R8_Unorm",
"B8G8R8_Snorm",
"B8G8R8_Uscaled",
"B8G8R8_Sscaled",
"B8G8R8_Uint",
"B8G8R8_Sint",
"B8G8R8_Srgb",
"R8G8B8A8_Unorm",
"R8G8B8A8_Snorm",
"R8G8B8A8_Uscaled",
"R8G8B8A8_Sscaled",
"R8G8B8A8_Uint",
"R8G8B8A8_Sint",
"R8G8B8A8_Srgb",
"B8G8R8A8_Unorm",
"B8G8R8A8_Snorm",
"B8G8R8A8_Uscaled",
"B8G8R8A8_Sscaled",
"B8G8R8A8_Uint",
"B8G8R8A8_Sint",
"B8G8R8A8_Srgb",
"A8B8G8R8_Unorm_Pack32",
"A8B8G8R8_Snorm_Pack32",
"A8B8G8R8_Uscaled_Pack32",
"A8B8G8R8_Sscaled_Pack32",
"A8B8G8R8_Uint_Pack32",
"A8B8G8R8_Sint_Pack32",
"A8B8G8R8_Srgb_Pack32",
"A2R10G10B10_Unorm_Pack32",
"A2R10G10B10_Snorm_Pack32",
"A2R10G10B10_Uscaled_Pack32",
"A2R10G10B10_Sscaled_Pack32",
"A2R10G10B10_Uint_Pack32",
"A2R10G10B10_Sint_Pack32",
"A2B10G10R10_Unorm_Pack32",
"A2B10G10R10_Snorm_Pack32",
"A2B10G10R10_Uscaled_Pack32",
"A2B10G10R10_Sscaled_Pack32",
"A2B10G10R10_Uint_Pack32",
"A2B10G10R10_Sint_Pack32",
"R16_Unorm",
"R16_Snorm",
"R16_Uscaled",
"R16_Sscaled",
"R16_Uint",
"R16_Sint",
"R16_Sfloat",
"R16G16_Unorm",
"R16G16_Snorm",
"R16G16_Uscaled",
"R16G16_Sscaled",
"R16G16_Uint",
"R16G16_Sint",
"R16G16_Sfloat",
"R16G16B16_Unorm",
"R16G16B16_Snorm",
"R16G16B16_Uscaled",
"R16G16B16_Sscaled",
"R16G16B16_Uint",
"R16G16B16_Sint",
"R16G16B16_Sfloat",
"R16G16B16A16_Unorm",
"R16G16B16A16_Snorm",
"R16G16B16A16_Uscaled",
"R16G16B16A16_Sscaled",
"R16G16B16A16_Uint",
"R16G16B16A16_Sint",
"R16G16B16A16_Sfloat",
"R32_Uint",
"R32_Sint",
"R32_Sfloat",
"R32G32_Uint",
"R32G32_Sint",
"R32G32_Sfloat",
"R32G32B32_Uint",
"R32G32B32_Sint",
"R32G32B32_Sfloat",
"R32G32B32A32_Uint",
"R32G32B32A32_Sint",
"R32G32B32A32_Sfloat",
"R64_Uint",
"R64_Sint",
"R64_Sfloat",
"R64G64_Uint",
"R64G64_Sint",
"R64G64_Sfloat",
"R64G64B64_Uint",
"R64G64B64_Sint",
"R64G64B64_Sfloat",
"R64G64B64A64_Uint",
"R64G64B64A64_Sint",
"R64G64B64A64_Sfloat",
"B10G11R11_Ufloat_Pack32",
"E5B9G9R9_Ufloat_Pack32",
"D16_Unorm",
"X8_D24_Unorm_Pack32",
"D32_Sfloat",
"S8_Uint",
"D16_Unorm_S8_Uint",
"D24_Unorm_S8_Uint",
"D32_Sfloat_S8_Uint",
"Bc1_Rgb_Unorm_Block",
"Bc1_Rgb_Srgb_Block",
"Bc1_Rgba_Unorm_Block",
"Bc1_Rgba_Srgb_Block",
"Bc2_Unorm_Block",
"Bc2_Srgb_Block",
"Bc3_Unorm_Block",
"Bc3_Srgb_Block",
"Bc4_Unorm_Block",
"Bc4_Snorm_Block",
"Bc5_Unorm_Block",
"Bc5_Snorm_Block",
"Bc6H_Ufloat_Block",
"Bc6H_Sfloat_Block",
"Bc7_Unorm_Block",
"Bc7_Srgb_Block",
"Etc2_R8G8B8_Unorm_Block",
"Etc2_R8G8B8_Srgb_Block",
"Etc2_R8G8B8A1_Unorm_Block",
"Etc2_R8G8B8A1_Srgb_Block",
"Etc2_R8G8B8A8_Unorm_Block",
"Etc2_R8G8B8A8_Srgb_Block",
"Eac_R11_Unorm_Block",
"Eac_R11_Snorm_Block",
"Eac_R11G11_Unorm_Block",
"Eac_R11G11_Snorm_Block",
"Astc_4X4_Unorm_Block",
"Astc_4X4_Srgb_Block",
"Astc_5X4_Unorm_Block",
"Astc_5X4_Srgb_Block",
"Astc_5X5_Unorm_Block",
"Astc_5X5_Srgb_Block",
"Astc_6X5_Unorm_Block",
"Astc_6X5_Srgb_Block",
"Astc_6X6_Unorm_Block",
"Astc_6X6_Srgb_Block",
"Astc_8X5_Unorm_Block",
"Astc_8X5_Srgb_Block",
"Astc_8X6_Unorm_Block",
"Astc_8X6_Srgb_Block",
"Astc_8X8_Unorm_Block",
"Astc_8X8_Srgb_Block",
"Astc_10X5_Unorm_Block",
"Astc_10X5_Srgb_Block",
"Astc_10X6_Unorm_Block",
"Astc_10X6_Srgb_Block",
"Astc_10X8_Unorm_Block",
"Astc_10X8_Srgb_Block",
"Astc_10X10_Unorm_Block",
"Astc_10X10_Srgb_Block",
"Astc_12X10_Unorm_Block",
"Astc_12X10_Srgb_Block",
"Astc_12X12_Unorm_Block",
"Astc_12X12_Srgb_Block",
"G8B8G8R8_422_Unorm",
"B8G8R8G8_422_Unorm",
"G8_B8_R8_3Plane_420_Unorm",
"G8_B8R8_2Plane_420_Unorm",
"G8_B8_R8_3Plane_422_Unorm",
"G8_B8R8_2Plane_422_Unorm",
"G8_B8_R8_3Plane_444_Unorm",
"R10X6_Unorm_Pack16",
"R10X6G10X6_Unorm_2Pack16",
"R10X6G10X6B10X6A10X6_Unorm_4Pack16",
"G10X6B10X6G10X6R10X6_422_Unorm_4Pack16",
"B10X6G10X6R10X6G10X6_422_Unorm_4Pack16",
"G10X6_B10X6_R10X6_3Plane_420_Unorm_3Pack16",
"G10X6_B10X6R10X6_2Plane_420_Unorm_3Pack16",
"G10X6_B10X6_R10X6_3Plane_422_Unorm_3Pack16",
"G10X6_B10X6R10X6_2Plane_422_Unorm_3Pack16",
"G10X6_B10X6_R10X6_3Plane_444_Unorm_3Pack16",
"R12X4_Unorm_Pack16",
"R12X4G12X4_Unorm_2Pack16",
"R12X4G12X4B12X4A12X4_Unorm_4Pack16",
"G12X4B12X4G12X4R12X4_422_Unorm_4Pack16",
"B12X4G12X4R12X4G12X4_422_Unorm_4Pack16",
"G12X4_B12X4_R12X4_3Plane_420_Unorm_3Pack16",
"G12X4_B12X4R12X4_2Plane_420_Unorm_3Pack16",
"G12X4_B12X4_R12X4_3Plane_422_Unorm_3Pack16",
"G12X4_B12X4R12X4_2Plane_422_Unorm_3Pack16",
"G12X4_B12X4_R12X4_3Plane_444_Unorm_3Pack16",
"G16B16G16R16_422_Unorm",
"B16G16R16G16_422_Unorm",
"G16_B16_R16_3Plane_420_Unorm",
"G16_B16R16_2Plane_420_Unorm",
"G16_B16_R16_3Plane_422_Unorm",
"G16_B16R16_2Plane_422_Unorm",
"G16_B16_R16_3Plane_444_Unorm",
};
int RenderingDeviceVulkan::get_format_vertex_size(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_R8_UNORM:
case DATA_FORMAT_R8_SNORM:
case DATA_FORMAT_R8_UINT:
case DATA_FORMAT_R8_SINT:
case DATA_FORMAT_R8G8_UNORM:
case DATA_FORMAT_R8G8_SNORM:
case DATA_FORMAT_R8G8_UINT:
case DATA_FORMAT_R8G8_SINT:
case DATA_FORMAT_R8G8B8_UNORM:
case DATA_FORMAT_R8G8B8_SNORM:
case DATA_FORMAT_R8G8B8_UINT:
case DATA_FORMAT_R8G8B8_SINT:
case DATA_FORMAT_B8G8R8_UNORM:
case DATA_FORMAT_B8G8R8_SNORM:
case DATA_FORMAT_B8G8R8_UINT:
case DATA_FORMAT_B8G8R8_SINT:
case DATA_FORMAT_R8G8B8A8_UNORM:
case DATA_FORMAT_R8G8B8A8_SNORM:
case DATA_FORMAT_R8G8B8A8_UINT:
case DATA_FORMAT_R8G8B8A8_SINT:
case DATA_FORMAT_B8G8R8A8_UNORM:
case DATA_FORMAT_B8G8R8A8_SNORM:
case DATA_FORMAT_B8G8R8A8_UINT:
case DATA_FORMAT_B8G8R8A8_SINT:
case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:
return 4;
case DATA_FORMAT_R16_UNORM:
case DATA_FORMAT_R16_SNORM:
case DATA_FORMAT_R16_UINT:
case DATA_FORMAT_R16_SINT:
case DATA_FORMAT_R16_SFLOAT:
return 4;
case DATA_FORMAT_R16G16_UNORM:
case DATA_FORMAT_R16G16_SNORM:
case DATA_FORMAT_R16G16_UINT:
case DATA_FORMAT_R16G16_SINT:
case DATA_FORMAT_R16G16_SFLOAT:
return 4;
case DATA_FORMAT_R16G16B16_UNORM:
case DATA_FORMAT_R16G16B16_SNORM:
case DATA_FORMAT_R16G16B16_UINT:
case DATA_FORMAT_R16G16B16_SINT:
case DATA_FORMAT_R16G16B16_SFLOAT:
return 8;
case DATA_FORMAT_R16G16B16A16_UNORM:
case DATA_FORMAT_R16G16B16A16_SNORM:
case DATA_FORMAT_R16G16B16A16_UINT:
case DATA_FORMAT_R16G16B16A16_SINT:
case DATA_FORMAT_R16G16B16A16_SFLOAT:
return 8;
case DATA_FORMAT_R32_UINT:
case DATA_FORMAT_R32_SINT:
case DATA_FORMAT_R32_SFLOAT:
return 4;
case DATA_FORMAT_R32G32_UINT:
case DATA_FORMAT_R32G32_SINT:
case DATA_FORMAT_R32G32_SFLOAT:
return 8;
case DATA_FORMAT_R32G32B32_UINT:
case DATA_FORMAT_R32G32B32_SINT:
case DATA_FORMAT_R32G32B32_SFLOAT:
return 12;
case DATA_FORMAT_R32G32B32A32_UINT:
case DATA_FORMAT_R32G32B32A32_SINT:
case DATA_FORMAT_R32G32B32A32_SFLOAT:
return 16;
case DATA_FORMAT_R64_UINT:
case DATA_FORMAT_R64_SINT:
case DATA_FORMAT_R64_SFLOAT:
return 8;
case DATA_FORMAT_R64G64_UINT:
case DATA_FORMAT_R64G64_SINT:
case DATA_FORMAT_R64G64_SFLOAT:
return 16;
case DATA_FORMAT_R64G64B64_UINT:
case DATA_FORMAT_R64G64B64_SINT:
case DATA_FORMAT_R64G64B64_SFLOAT:
return 24;
case DATA_FORMAT_R64G64B64A64_UINT:
case DATA_FORMAT_R64G64B64A64_SINT:
case DATA_FORMAT_R64G64B64A64_SFLOAT:
return 32;
default:
return 0;
}
}
uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_R4G4_UNORM_PACK8:
return 1;
case DATA_FORMAT_R4G4B4A4_UNORM_PACK16:
case DATA_FORMAT_B4G4R4A4_UNORM_PACK16:
case DATA_FORMAT_R5G6B5_UNORM_PACK16:
case DATA_FORMAT_B5G6R5_UNORM_PACK16:
case DATA_FORMAT_R5G5B5A1_UNORM_PACK16:
case DATA_FORMAT_B5G5R5A1_UNORM_PACK16:
case DATA_FORMAT_A1R5G5B5_UNORM_PACK16:
return 2;
case DATA_FORMAT_R8_UNORM:
case DATA_FORMAT_R8_SNORM:
case DATA_FORMAT_R8_USCALED:
case DATA_FORMAT_R8_SSCALED:
case DATA_FORMAT_R8_UINT:
case DATA_FORMAT_R8_SINT:
case DATA_FORMAT_R8_SRGB:
return 1;
case DATA_FORMAT_R8G8_UNORM:
case DATA_FORMAT_R8G8_SNORM:
case DATA_FORMAT_R8G8_USCALED:
case DATA_FORMAT_R8G8_SSCALED:
case DATA_FORMAT_R8G8_UINT:
case DATA_FORMAT_R8G8_SINT:
case DATA_FORMAT_R8G8_SRGB:
return 2;
case DATA_FORMAT_R8G8B8_UNORM:
case DATA_FORMAT_R8G8B8_SNORM:
case DATA_FORMAT_R8G8B8_USCALED:
case DATA_FORMAT_R8G8B8_SSCALED:
case DATA_FORMAT_R8G8B8_UINT:
case DATA_FORMAT_R8G8B8_SINT:
case DATA_FORMAT_R8G8B8_SRGB:
case DATA_FORMAT_B8G8R8_UNORM:
case DATA_FORMAT_B8G8R8_SNORM:
case DATA_FORMAT_B8G8R8_USCALED:
case DATA_FORMAT_B8G8R8_SSCALED:
case DATA_FORMAT_B8G8R8_UINT:
case DATA_FORMAT_B8G8R8_SINT:
case DATA_FORMAT_B8G8R8_SRGB:
return 3;
case DATA_FORMAT_R8G8B8A8_UNORM:
case DATA_FORMAT_R8G8B8A8_SNORM:
case DATA_FORMAT_R8G8B8A8_USCALED:
case DATA_FORMAT_R8G8B8A8_SSCALED:
case DATA_FORMAT_R8G8B8A8_UINT:
case DATA_FORMAT_R8G8B8A8_SINT:
case DATA_FORMAT_R8G8B8A8_SRGB:
case DATA_FORMAT_B8G8R8A8_UNORM:
case DATA_FORMAT_B8G8R8A8_SNORM:
case DATA_FORMAT_B8G8R8A8_USCALED:
case DATA_FORMAT_B8G8R8A8_SSCALED:
case DATA_FORMAT_B8G8R8A8_UINT:
case DATA_FORMAT_B8G8R8A8_SINT:
case DATA_FORMAT_B8G8R8A8_SRGB:
return 4;
case DATA_FORMAT_A8B8G8R8_UNORM_PACK32:
case DATA_FORMAT_A8B8G8R8_SNORM_PACK32:
case DATA_FORMAT_A8B8G8R8_USCALED_PACK32:
case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32:
case DATA_FORMAT_A8B8G8R8_UINT_PACK32:
case DATA_FORMAT_A8B8G8R8_SINT_PACK32:
case DATA_FORMAT_A8B8G8R8_SRGB_PACK32:
case DATA_FORMAT_A2R10G10B10_UNORM_PACK32:
case DATA_FORMAT_A2R10G10B10_SNORM_PACK32:
case DATA_FORMAT_A2R10G10B10_USCALED_PACK32:
case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32:
case DATA_FORMAT_A2R10G10B10_UINT_PACK32:
case DATA_FORMAT_A2R10G10B10_SINT_PACK32:
case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:
case DATA_FORMAT_A2B10G10R10_SNORM_PACK32:
case DATA_FORMAT_A2B10G10R10_USCALED_PACK32:
case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32:
case DATA_FORMAT_A2B10G10R10_UINT_PACK32:
case DATA_FORMAT_A2B10G10R10_SINT_PACK32:
return 4;
case DATA_FORMAT_R16_UNORM:
case DATA_FORMAT_R16_SNORM:
case DATA_FORMAT_R16_USCALED:
case DATA_FORMAT_R16_SSCALED:
case DATA_FORMAT_R16_UINT:
case DATA_FORMAT_R16_SINT:
case DATA_FORMAT_R16_SFLOAT:
return 2;
case DATA_FORMAT_R16G16_UNORM:
case DATA_FORMAT_R16G16_SNORM:
case DATA_FORMAT_R16G16_USCALED:
case DATA_FORMAT_R16G16_SSCALED:
case DATA_FORMAT_R16G16_UINT:
case DATA_FORMAT_R16G16_SINT:
case DATA_FORMAT_R16G16_SFLOAT:
return 4;
case DATA_FORMAT_R16G16B16_UNORM:
case DATA_FORMAT_R16G16B16_SNORM:
case DATA_FORMAT_R16G16B16_USCALED:
case DATA_FORMAT_R16G16B16_SSCALED:
case DATA_FORMAT_R16G16B16_UINT:
case DATA_FORMAT_R16G16B16_SINT:
case DATA_FORMAT_R16G16B16_SFLOAT:
return 6;
case DATA_FORMAT_R16G16B16A16_UNORM:
case DATA_FORMAT_R16G16B16A16_SNORM:
case DATA_FORMAT_R16G16B16A16_USCALED:
case DATA_FORMAT_R16G16B16A16_SSCALED:
case DATA_FORMAT_R16G16B16A16_UINT:
case DATA_FORMAT_R16G16B16A16_SINT:
case DATA_FORMAT_R16G16B16A16_SFLOAT:
return 8;
case DATA_FORMAT_R32_UINT:
case DATA_FORMAT_R32_SINT:
case DATA_FORMAT_R32_SFLOAT:
return 4;
case DATA_FORMAT_R32G32_UINT:
case DATA_FORMAT_R32G32_SINT:
case DATA_FORMAT_R32G32_SFLOAT:
return 8;
case DATA_FORMAT_R32G32B32_UINT:
case DATA_FORMAT_R32G32B32_SINT:
case DATA_FORMAT_R32G32B32_SFLOAT:
return 12;
case DATA_FORMAT_R32G32B32A32_UINT:
case DATA_FORMAT_R32G32B32A32_SINT:
case DATA_FORMAT_R32G32B32A32_SFLOAT:
return 16;
case DATA_FORMAT_R64_UINT:
case DATA_FORMAT_R64_SINT:
case DATA_FORMAT_R64_SFLOAT:
return 8;
case DATA_FORMAT_R64G64_UINT:
case DATA_FORMAT_R64G64_SINT:
case DATA_FORMAT_R64G64_SFLOAT:
return 16;
case DATA_FORMAT_R64G64B64_UINT:
case DATA_FORMAT_R64G64B64_SINT:
case DATA_FORMAT_R64G64B64_SFLOAT:
return 24;
case DATA_FORMAT_R64G64B64A64_UINT:
case DATA_FORMAT_R64G64B64A64_SINT:
case DATA_FORMAT_R64G64B64A64_SFLOAT:
return 32;
case DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return 4;
case DATA_FORMAT_D16_UNORM:
return 2;
case DATA_FORMAT_X8_D24_UNORM_PACK32:
return 4;
case DATA_FORMAT_D32_SFLOAT:
return 4;
case DATA_FORMAT_S8_UINT:
return 1;
case DATA_FORMAT_D16_UNORM_S8_UINT:
return 4;
case DATA_FORMAT_D24_UNORM_S8_UINT:
return 4;
case DATA_FORMAT_D32_SFLOAT_S8_UINT:
return 5; //?
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
case DATA_FORMAT_BC2_UNORM_BLOCK:
case DATA_FORMAT_BC2_SRGB_BLOCK:
case DATA_FORMAT_BC3_UNORM_BLOCK:
case DATA_FORMAT_BC3_SRGB_BLOCK:
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
case DATA_FORMAT_BC5_UNORM_BLOCK:
case DATA_FORMAT_BC5_SNORM_BLOCK:
case DATA_FORMAT_BC6H_UFLOAT_BLOCK:
case DATA_FORMAT_BC6H_SFLOAT_BLOCK:
case DATA_FORMAT_BC7_UNORM_BLOCK:
case DATA_FORMAT_BC7_SRGB_BLOCK:
return 1;
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
return 1;
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK:
return 1;
case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK:
return 1;
case DATA_FORMAT_G8B8G8R8_422_UNORM:
case DATA_FORMAT_B8G8R8G8_422_UNORM:
return 4;
case DATA_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
case DATA_FORMAT_G8_B8R8_2PLANE_420_UNORM:
case DATA_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
case DATA_FORMAT_G8_B8R8_2PLANE_422_UNORM:
case DATA_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
return 4;
case DATA_FORMAT_R10X6_UNORM_PACK16:
case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16:
case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
case DATA_FORMAT_R12X4_UNORM_PACK16:
case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16:
case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
return 2;
case DATA_FORMAT_G16B16G16R16_422_UNORM:
case DATA_FORMAT_B16G16R16G16_422_UNORM:
case DATA_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
case DATA_FORMAT_G16_B16R16_2PLANE_420_UNORM:
case DATA_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
case DATA_FORMAT_G16_B16R16_2PLANE_422_UNORM:
case DATA_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
return 8;
default: {
ERR_PRINT("Format not handled, bug");
}
}
return 1;
}
// https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.pdf
void RenderingDeviceVulkan::get_compressed_image_format_block_dimensions(DataFormat p_format, uint32_t &r_w, uint32_t &r_h) {
switch (p_format) {
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
case DATA_FORMAT_BC2_UNORM_BLOCK:
case DATA_FORMAT_BC2_SRGB_BLOCK:
case DATA_FORMAT_BC3_UNORM_BLOCK:
case DATA_FORMAT_BC3_SRGB_BLOCK:
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
case DATA_FORMAT_BC5_UNORM_BLOCK:
case DATA_FORMAT_BC5_SNORM_BLOCK:
case DATA_FORMAT_BC6H_UFLOAT_BLOCK:
case DATA_FORMAT_BC6H_SFLOAT_BLOCK:
case DATA_FORMAT_BC7_UNORM_BLOCK:
case DATA_FORMAT_BC7_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK:
case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK: //again, not sure about astc
case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK:
r_w = 4;
r_h = 4;
return;
default: {
r_w = 1;
r_h = 1;
}
}
}
uint32_t RenderingDeviceVulkan::get_compressed_image_format_block_byte_size(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
return 8;
case DATA_FORMAT_BC2_UNORM_BLOCK:
case DATA_FORMAT_BC2_SRGB_BLOCK:
return 16;
case DATA_FORMAT_BC3_UNORM_BLOCK:
case DATA_FORMAT_BC3_SRGB_BLOCK:
return 16;
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
return 8;
case DATA_FORMAT_BC5_UNORM_BLOCK:
case DATA_FORMAT_BC5_SNORM_BLOCK:
return 16;
case DATA_FORMAT_BC6H_UFLOAT_BLOCK:
case DATA_FORMAT_BC6H_SFLOAT_BLOCK:
return 16;
case DATA_FORMAT_BC7_UNORM_BLOCK:
case DATA_FORMAT_BC7_SRGB_BLOCK:
return 16;
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
return 8;
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
return 8;
case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
return 16;
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
return 8;
case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK:
return 16;
case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK: //again, not sure about astc
case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK:
return 8; //wrong
default: {
}
}
return 1;
}
uint32_t RenderingDeviceVulkan::get_compressed_image_format_pixel_rshift(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: //these formats are half byte size, so rshift is 1
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
return 1;
default: {
}
}
return 0;
}
bool RenderingDeviceVulkan::format_has_stencil(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_S8_UINT:
case DATA_FORMAT_D16_UNORM_S8_UINT:
case DATA_FORMAT_D24_UNORM_S8_UINT:
case DATA_FORMAT_D32_SFLOAT_S8_UINT: {
return true;
}
default: {
}
}
return false;
}
uint32_t RenderingDeviceVulkan::get_image_format_required_size(DataFormat p_format, uint32_t p_width, uint32_t p_height, uint32_t p_depth, uint32_t p_mipmaps, uint32_t *r_blockw, uint32_t *r_blockh, uint32_t *r_depth) {
ERR_FAIL_COND_V(p_mipmaps == 0, 0);
uint32_t w = p_width;
uint32_t h = p_height;
uint32_t d = p_depth;
uint32_t size = 0;
uint32_t pixel_size = get_image_format_pixel_size(p_format);
uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(p_format);
uint32_t blockw, blockh;
get_compressed_image_format_block_dimensions(p_format, blockw, blockh);
for (uint32_t i = 0; i < p_mipmaps; i++) {
uint32_t bw = w % blockw != 0 ? w + (blockw - w % blockw) : w;
uint32_t bh = h % blockh != 0 ? h + (blockh - h % blockh) : h;
uint32_t s = bw * bh;
s *= pixel_size;
s >>= pixel_rshift;
size += s * d;
if (r_blockw) {
*r_blockw = bw;
}
if (r_blockh) {
*r_blockh = bh;
}
if (r_depth) {
*r_depth = d;
}
w = MAX(blockw, w >> 1);
h = MAX(blockh, h >> 1);
d = MAX(1, d >> 1);
}
return size;
}
uint32_t RenderingDeviceVulkan::get_image_required_mipmaps(uint32_t p_width, uint32_t p_height, uint32_t p_depth) {
//formats and block size don't really matter here since they can all go down to 1px (even if block is larger)
int w = p_width;
int h = p_height;
int d = p_depth;
int mipmaps = 1;
while (true) {
if (w == 1 && h == 1 && d == 1) {
break;
}
w = MAX(1, w >> 1);
h = MAX(1, h >> 1);
d = MAX(1, d >> 1);
mipmaps++;
}
return mipmaps;
}
///////////////////////
const VkCompareOp RenderingDeviceVulkan::compare_operators[RenderingDevice::COMPARE_OP_MAX] = {
VK_COMPARE_OP_NEVER,
VK_COMPARE_OP_LESS,
VK_COMPARE_OP_EQUAL,
VK_COMPARE_OP_LESS_OR_EQUAL,
VK_COMPARE_OP_GREATER,
VK_COMPARE_OP_NOT_EQUAL,
VK_COMPARE_OP_GREATER_OR_EQUAL,
VK_COMPARE_OP_ALWAYS
};
const VkStencilOp RenderingDeviceVulkan::stencil_operations[RenderingDevice::STENCIL_OP_MAX] = {
VK_STENCIL_OP_KEEP,
VK_STENCIL_OP_ZERO,
VK_STENCIL_OP_REPLACE,
VK_STENCIL_OP_INCREMENT_AND_CLAMP,
VK_STENCIL_OP_DECREMENT_AND_CLAMP,
VK_STENCIL_OP_INVERT,
VK_STENCIL_OP_INCREMENT_AND_WRAP,
VK_STENCIL_OP_DECREMENT_AND_WRAP
};
const VkSampleCountFlagBits RenderingDeviceVulkan::rasterization_sample_count[RenderingDevice::TEXTURE_SAMPLES_MAX] = {
VK_SAMPLE_COUNT_1_BIT,
VK_SAMPLE_COUNT_2_BIT,
VK_SAMPLE_COUNT_4_BIT,
VK_SAMPLE_COUNT_8_BIT,
VK_SAMPLE_COUNT_16_BIT,
VK_SAMPLE_COUNT_32_BIT,
VK_SAMPLE_COUNT_64_BIT,
};
const VkLogicOp RenderingDeviceVulkan::logic_operations[RenderingDevice::LOGIC_OP_MAX] = {
VK_LOGIC_OP_CLEAR,
VK_LOGIC_OP_AND,
VK_LOGIC_OP_AND_REVERSE,
VK_LOGIC_OP_COPY,
VK_LOGIC_OP_AND_INVERTED,
VK_LOGIC_OP_NO_OP,
VK_LOGIC_OP_XOR,
VK_LOGIC_OP_OR,
VK_LOGIC_OP_NOR,
VK_LOGIC_OP_EQUIVALENT,
VK_LOGIC_OP_INVERT,
VK_LOGIC_OP_OR_REVERSE,
VK_LOGIC_OP_COPY_INVERTED,
VK_LOGIC_OP_OR_INVERTED,
VK_LOGIC_OP_NAND,
VK_LOGIC_OP_SET
};
const VkBlendFactor RenderingDeviceVulkan::blend_factors[RenderingDevice::BLEND_FACTOR_MAX] = {
VK_BLEND_FACTOR_ZERO,
VK_BLEND_FACTOR_ONE,
VK_BLEND_FACTOR_SRC_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
VK_BLEND_FACTOR_DST_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
VK_BLEND_FACTOR_SRC_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
VK_BLEND_FACTOR_CONSTANT_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
VK_BLEND_FACTOR_CONSTANT_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
VK_BLEND_FACTOR_SRC_ALPHA_SATURATE,
VK_BLEND_FACTOR_SRC1_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
VK_BLEND_FACTOR_SRC1_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
};
const VkBlendOp RenderingDeviceVulkan::blend_operations[RenderingDevice::BLEND_OP_MAX] = {
VK_BLEND_OP_ADD,
VK_BLEND_OP_SUBTRACT,
VK_BLEND_OP_REVERSE_SUBTRACT,
VK_BLEND_OP_MIN,
VK_BLEND_OP_MAX
};
const VkSamplerAddressMode RenderingDeviceVulkan::address_modes[RenderingDevice::SAMPLER_REPEAT_MODE_MAX] = {
VK_SAMPLER_ADDRESS_MODE_REPEAT,
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE
};
const VkBorderColor RenderingDeviceVulkan::sampler_border_colors[RenderingDevice::SAMPLER_BORDER_COLOR_MAX] = {
VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,
VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
VK_BORDER_COLOR_INT_OPAQUE_BLACK,
VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
VK_BORDER_COLOR_INT_OPAQUE_WHITE
};
const VkImageType RenderingDeviceVulkan::vulkan_image_type[RenderingDevice::TEXTURE_TYPE_MAX] = {
VK_IMAGE_TYPE_1D,
VK_IMAGE_TYPE_2D,
VK_IMAGE_TYPE_3D,
VK_IMAGE_TYPE_2D,
VK_IMAGE_TYPE_1D,
VK_IMAGE_TYPE_2D,
VK_IMAGE_TYPE_2D
};
/***************************/
/**** BUFFER MANAGEMENT ****/
/***************************/
Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mapping) {
VkBufferCreateInfo bufferInfo;
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.pNext = nullptr;
bufferInfo.flags = 0;
bufferInfo.size = p_size;
bufferInfo.usage = p_usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufferInfo.queueFamilyIndexCount = 0;
bufferInfo.pQueueFamilyIndices = nullptr;
VmaAllocationCreateInfo allocInfo;
allocInfo.flags = 0;
allocInfo.usage = p_mapping;
allocInfo.requiredFlags = 0;
allocInfo.preferredFlags = 0;
allocInfo.memoryTypeBits = 0;
allocInfo.pool = nullptr;
allocInfo.pUserData = nullptr;
if (p_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
vmaFindMemoryTypeIndexForBufferInfo(allocator, &bufferInfo, &allocInfo, &mem_type_index);
allocInfo.pool = _find_or_create_small_allocs_pool(mem_type_index);
}
VkResult err = vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &p_buffer->buffer, &p_buffer->allocation, nullptr);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Can't create buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
p_buffer->size = p_size;
p_buffer->buffer_info.buffer = p_buffer->buffer;
p_buffer->buffer_info.offset = 0;
p_buffer->buffer_info.range = p_size;
p_buffer->usage = p_usage;
buffer_memory += p_size;
return OK;
}
Error RenderingDeviceVulkan::_buffer_free(Buffer *p_buffer) {
ERR_FAIL_COND_V(p_buffer->size == 0, ERR_INVALID_PARAMETER);
buffer_memory -= p_buffer->size;
vmaDestroyBuffer(allocator, p_buffer->buffer, p_buffer->allocation);
p_buffer->buffer = VK_NULL_HANDLE;
p_buffer->allocation = nullptr;
p_buffer->size = 0;
return OK;
}
Error RenderingDeviceVulkan::_insert_staging_block() {
VkBufferCreateInfo bufferInfo;
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.pNext = nullptr;
bufferInfo.flags = 0;
bufferInfo.size = staging_buffer_block_size;
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufferInfo.queueFamilyIndexCount = 0;
bufferInfo.pQueueFamilyIndices = nullptr;
VmaAllocationCreateInfo allocInfo;
allocInfo.flags = 0;
allocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
allocInfo.requiredFlags = 0;
allocInfo.preferredFlags = 0;
allocInfo.memoryTypeBits = 0;
allocInfo.pool = nullptr;
allocInfo.pUserData = nullptr;
StagingBufferBlock block;
VkResult err = vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &block.buffer, &block.allocation, nullptr);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vmaCreateBuffer failed with error " + itos(err) + ".");
block.frame_used = 0;
block.fill_amount = 0;
staging_buffer_blocks.insert(staging_buffer_current, block);
return OK;
}
Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, bool p_can_segment, bool p_on_draw_command_buffer) {
//determine a block to use
r_alloc_size = p_amount;
while (true) {
r_alloc_offset = 0;
//see if we can use current block
if (staging_buffer_blocks[staging_buffer_current].frame_used == frames_drawn) {
//we used this block this frame, let's see if there is still room
uint32_t write_from = staging_buffer_blocks[staging_buffer_current].fill_amount;
{
uint32_t align_remainder = write_from % p_required_align;
if (align_remainder != 0) {
write_from += p_required_align - align_remainder;
}
}
int32_t available_bytes = int32_t(staging_buffer_block_size) - int32_t(write_from);
if ((int32_t)p_amount < available_bytes) {
//all is good, we should be ok, all will fit
r_alloc_offset = write_from;
} else if (p_can_segment && available_bytes >= (int32_t)p_required_align) {
//ok all won't fit but at least we can fit a chunkie
//all is good, update what needs to be written to
r_alloc_offset = write_from;
r_alloc_size = available_bytes - (available_bytes % p_required_align);
} else {
//can't fit it into this buffer.
//will need to try next buffer
staging_buffer_current = (staging_buffer_current + 1) % staging_buffer_blocks.size();
// before doing anything, though, let's check that we didn't manage to fill all blocks
// possible in a single frame
if (staging_buffer_blocks[staging_buffer_current].frame_used == frames_drawn) {
//guess we did.. ok, let's see if we can insert a new block..
if ((uint64_t)staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) {
//we can, so we are safe
Error err = _insert_staging_block();
if (err) {
return err;
}
//claim for this frame
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
} else {
// Ok, worst case scenario, all the staging buffers belong to this frame
// and this frame is not even done.
// If this is the main thread, it means the user is likely loading a lot of resources at once,
// otherwise, the thread should just be blocked until the next frame (currently unimplemented)
if (false) { //separate thread from render
//block_until_next_frame()
continue;
} else {
//flush EVERYTHING including setup commands. IF not immediate, also need to flush the draw commands
_flush(true);
//clear the whole staging buffer
for (int i = 0; i < staging_buffer_blocks.size(); i++) {
staging_buffer_blocks.write[i].frame_used = 0;
staging_buffer_blocks.write[i].fill_amount = 0;
}
//claim current
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
}
}
} else {
//not from current frame, so continue and try again
continue;
}
}
} else if (staging_buffer_blocks[staging_buffer_current].frame_used <= frames_drawn - frame_count) {
//this is an old block, which was already processed, let's reuse
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
staging_buffer_blocks.write[staging_buffer_current].fill_amount = 0;
} else if (staging_buffer_blocks[staging_buffer_current].frame_used > frames_drawn - frame_count) {
//this block may still be in use, let's not touch it unless we have to, so.. can we create a new one?
if ((uint64_t)staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) {
//we are still allowed to create a new block, so let's do that and insert it for current pos
Error err = _insert_staging_block();
if (err) {
return err;
}
//claim for this frame
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
} else {
// oops, we are out of room and we can't create more.
// let's flush older frames.
// The logic here is that if a game is loading a lot of data from the main thread, it will need to be stalled anyway.
// If loading from a separate thread, we can block that thread until next frame when more room is made (not currently implemented, though).
if (false) {
//separate thread from render
//block_until_next_frame()
continue; //and try again
} else {
_flush(false);
for (int i = 0; i < staging_buffer_blocks.size(); i++) {
//clear all blocks but the ones from this frame
int block_idx = (i + staging_buffer_current) % staging_buffer_blocks.size();
if (staging_buffer_blocks[block_idx].frame_used == frames_drawn) {
break; //ok, we reached something from this frame, abort
}
staging_buffer_blocks.write[block_idx].frame_used = 0;
staging_buffer_blocks.write[block_idx].fill_amount = 0;
}
//claim for current frame
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
}
}
}
//all was good, break
break;
}
staging_buffer_used = true;
return OK;
}
Error RenderingDeviceVulkan::_buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer, uint32_t p_required_align) {
//submitting may get chunked for various reasons, so convert this to a task
size_t to_submit = p_data_size;
size_t submit_from = 0;
while (to_submit > 0) {
uint32_t block_write_offset;
uint32_t block_write_amount;
Error err = _staging_buffer_allocate(MIN(to_submit, staging_buffer_block_size), p_required_align, block_write_offset, block_write_amount, p_use_draw_command_buffer);
if (err) {
return err;
}
//map staging buffer (It's CPU and coherent)
void *data_ptr = nullptr;
{
VkResult vkerr = vmaMapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation, &data_ptr);
ERR_FAIL_COND_V_MSG(vkerr, ERR_CANT_CREATE, "vmaMapMemory failed with error " + itos(vkerr) + ".");
}
//copy to staging buffer
memcpy(((uint8_t *)data_ptr) + block_write_offset, p_data + submit_from, block_write_amount);
//unmap
vmaUnmapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation);
//insert a command to copy this
VkBufferCopy region;
region.srcOffset = block_write_offset;
region.dstOffset = submit_from + p_offset;
region.size = block_write_amount;
vkCmdCopyBuffer(p_use_draw_command_buffer ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, p_buffer->buffer, 1, &region);
staging_buffer_blocks.write[staging_buffer_current].fill_amount = block_write_offset + block_write_amount;
to_submit -= block_write_amount;
submit_from += block_write_amount;
}
return OK;
}
void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_sccess, bool p_sync_with_draw) {
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = p_src_access;
mem_barrier.dstAccessMask = p_dst_sccess;
if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) {
return; //no barrier, since this is invalid
}
vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr);
}
void RenderingDeviceVulkan::_full_barrier(bool p_sync_with_draw) {
//used for debug
_memory_barrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT,
p_sync_with_draw);
}
void RenderingDeviceVulkan::_buffer_memory_barrier(VkBuffer buffer, uint64_t p_from, uint64_t p_size, VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_sccess, bool p_sync_with_draw) {
VkBufferMemoryBarrier buffer_mem_barrier;
buffer_mem_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_mem_barrier.pNext = nullptr;
buffer_mem_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_mem_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_mem_barrier.srcAccessMask = p_src_access;
buffer_mem_barrier.dstAccessMask = p_dst_sccess;
buffer_mem_barrier.buffer = buffer;
buffer_mem_barrier.offset = p_from;
buffer_mem_barrier.size = p_size;
vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 0, nullptr, 1, &buffer_mem_barrier, 0, nullptr);
}
/*****************/
/**** TEXTURE ****/
/*****************/
RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const TextureView &p_view, const Vector<Vector<uint8_t>> &p_data) {
_THREAD_SAFE_METHOD_
VkImageCreateInfo image_create_info;
image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_create_info.pNext = nullptr;
image_create_info.flags = 0;
#ifndef _MSC_VER
#warning TODO check for support via RenderingDevice to enable on mobile when possible
#endif
#ifndef ANDROID_ENABLED
// vkCreateImage fails with format list on Android (VK_ERROR_OUT_OF_HOST_MEMORY)
VkImageFormatListCreateInfoKHR format_list_create_info; //keep out of the if, needed for creation
Vector<VkFormat> allowed_formats; //keep out of the if, needed for creation
#endif
if (p_format.shareable_formats.size()) {
image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
#ifndef ANDROID_ENABLED
for (int i = 0; i < p_format.shareable_formats.size(); i++) {
allowed_formats.push_back(vulkan_formats[p_format.shareable_formats[i]]);
}
format_list_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
format_list_create_info.pNext = nullptr;
format_list_create_info.viewFormatCount = allowed_formats.size();
format_list_create_info.pViewFormats = allowed_formats.ptr();
image_create_info.pNext = &format_list_create_info;
ERR_FAIL_COND_V_MSG(p_format.shareable_formats.find(p_format.format) == -1, RID(),
"If supplied a list of shareable formats, the current format must be present in the list");
ERR_FAIL_COND_V_MSG(p_view.format_override != DATA_FORMAT_MAX && p_format.shareable_formats.find(p_view.format_override) == -1, RID(),
"If supplied a list of shareable formats, the current view format override must be present in the list");
#endif
}
if (p_format.texture_type == TEXTURE_TYPE_CUBE || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) {
image_create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
}
/*if (p_format.type == TEXTURE_TYPE_2D || p_format.type == TEXTURE_TYPE_2D_ARRAY) {
image_create_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
}*/
ERR_FAIL_INDEX_V(p_format.texture_type, TEXTURE_TYPE_MAX, RID());
image_create_info.imageType = vulkan_image_type[p_format.texture_type];
ERR_FAIL_COND_V_MSG(p_format.width < 1, RID(), "Width must be equal or greater than 1 for all textures");
image_create_info.format = vulkan_formats[p_format.format];
image_create_info.extent.width = p_format.width;
if (image_create_info.imageType == VK_IMAGE_TYPE_3D || image_create_info.imageType == VK_IMAGE_TYPE_2D) {
ERR_FAIL_COND_V_MSG(p_format.height < 1, RID(), "Height must be equal or greater than 1 for 2D and 3D textures");
image_create_info.extent.height = p_format.height;
} else {
image_create_info.extent.height = 1;
}
if (image_create_info.imageType == VK_IMAGE_TYPE_3D) {
ERR_FAIL_COND_V_MSG(p_format.depth < 1, RID(), "Depth must be equal or greater than 1 for 3D textures");
image_create_info.extent.depth = p_format.depth;
} else {
image_create_info.extent.depth = 1;
}
ERR_FAIL_COND_V(p_format.mipmaps < 1, RID());
image_create_info.mipLevels = p_format.mipmaps;
if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) {
ERR_FAIL_COND_V_MSG(p_format.array_layers < 1, RID(),
"Amount of layers must be equal or greater than 1 for arrays and cubemaps.");
ERR_FAIL_COND_V_MSG((p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) && (p_format.array_layers % 6) != 0, RID(),
"Cubemap and cubemap array textures must provide a layer number that is multiple of 6");
image_create_info.arrayLayers = p_format.array_layers;
} else {
image_create_info.arrayLayers = 1;
}
ERR_FAIL_INDEX_V(p_format.samples, TEXTURE_SAMPLES_MAX, RID());
image_create_info.samples = rasterization_sample_count[p_format.samples];
image_create_info.tiling = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
//usage
image_create_info.usage = 0;
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_create_info.queueFamilyIndexCount = 0;
image_create_info.pQueueFamilyIndices = nullptr;
image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
uint32_t required_mipmaps = get_image_required_mipmaps(image_create_info.extent.width, image_create_info.extent.height, image_create_info.extent.depth);
ERR_FAIL_COND_V_MSG(required_mipmaps < image_create_info.mipLevels, RID(),
"Too many mipmaps requested for texture format and dimensions (" + itos(image_create_info.mipLevels) + "), maximum allowed: (" + itos(required_mipmaps) + ").");
if (p_data.size()) {
ERR_FAIL_COND_V_MSG(!(p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT), RID(),
"Texture needs the TEXTURE_USAGE_CAN_UPDATE_BIT usage flag in order to be updated at initialization or later");
int expected_images = image_create_info.arrayLayers;
ERR_FAIL_COND_V_MSG(p_data.size() != expected_images, RID(),
"Default supplied data for image format is of invalid length (" + itos(p_data.size()) + "), should be (" + itos(expected_images) + ").");
for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) {
uint32_t required_size = get_image_format_required_size(p_format.format, image_create_info.extent.width, image_create_info.extent.height, image_create_info.extent.depth, image_create_info.mipLevels);
ERR_FAIL_COND_V_MSG((uint32_t)p_data[i].size() != required_size, RID(),
"Data for slice index " + itos(i) + " (mapped to layer " + itos(i) + ") differs in size (supplied: " + itos(p_data[i].size()) + ") than what is required by the format (" + itos(required_size) + ").");
}
}
{
//validate that this image is supported for the intended use
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), image_create_info.format, &properties);
VkFormatFeatureFlags flags;
String format_text = "'" + String(named_formats[p_format.format]) + "'";
if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) {
flags = properties.linearTilingFeatures;
format_text += " (with CPU read bit)";
} else {
flags = properties.optimalTilingFeatures;
}
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as sampling texture.");
}
if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as color attachment.");
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
printf("vkformat: %x\n", image_create_info.format);
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as depth-stencil attachment.");
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as storage image.");
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as atomic storage image.");
}
}
//some view validation
if (p_view.format_override != DATA_FORMAT_MAX) {
ERR_FAIL_INDEX_V(p_view.format_override, DATA_FORMAT_MAX, RID());
}
ERR_FAIL_INDEX_V(p_view.swizzle_r, TEXTURE_SWIZZLE_MAX, RID());
ERR_FAIL_INDEX_V(p_view.swizzle_g, TEXTURE_SWIZZLE_MAX, RID());
ERR_FAIL_INDEX_V(p_view.swizzle_b, TEXTURE_SWIZZLE_MAX, RID());
ERR_FAIL_INDEX_V(p_view.swizzle_a, TEXTURE_SWIZZLE_MAX, RID());
//allocate memory
uint32_t width, height;
uint32_t image_size = get_image_format_required_size(p_format.format, p_format.width, p_format.height, p_format.depth, p_format.mipmaps, &width, &height);
VmaAllocationCreateInfo allocInfo;
allocInfo.flags = 0;
allocInfo.pool = nullptr;
allocInfo.usage = p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT ? VMA_MEMORY_USAGE_CPU_ONLY : VMA_MEMORY_USAGE_GPU_ONLY;
allocInfo.requiredFlags = 0;
allocInfo.preferredFlags = 0;
allocInfo.memoryTypeBits = 0;
allocInfo.pUserData = nullptr;
if (image_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
vmaFindMemoryTypeIndexForImageInfo(allocator, &image_create_info, &allocInfo, &mem_type_index);
allocInfo.pool = _find_or_create_small_allocs_pool(mem_type_index);
}
Texture texture;
VkResult err = vmaCreateImage(allocator, &image_create_info, &allocInfo, &texture.image, &texture.allocation, &texture.allocation_info);
ERR_FAIL_COND_V_MSG(err, RID(), "vmaCreateImage failed with error " + itos(err) + ".");
image_memory += texture.allocation_info.size;
texture.type = p_format.texture_type;
texture.format = p_format.format;
texture.width = image_create_info.extent.width;
texture.height = image_create_info.extent.height;
texture.depth = image_create_info.extent.depth;
texture.layers = image_create_info.arrayLayers;
texture.mipmaps = image_create_info.mipLevels;
texture.base_mipmap = 0;
texture.base_layer = 0;
texture.usage_flags = p_format.usage_bits;
texture.samples = p_format.samples;
texture.allowed_shared_formats = p_format.shareable_formats;
//set base layout based on usage priority
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
//first priority, readable
texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
//second priority, storage
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
//third priority, color or depth
texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (format_has_stencil(p_format.format)) {
texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
} else {
texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
}
texture.bound = false;
//create view
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_3D,
VK_IMAGE_VIEW_TYPE_CUBE,
VK_IMAGE_VIEW_TYPE_1D_ARRAY,
VK_IMAGE_VIEW_TYPE_2D_ARRAY,
VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
image_view_create_info.viewType = view_types[p_format.texture_type];
if (p_view.format_override == DATA_FORMAT_MAX) {
image_view_create_info.format = image_create_info.format;
} else {
image_view_create_info.format = vulkan_formats[p_view.format_override];
}
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
image_view_create_info.components.r = component_swizzles[p_view.swizzle_r];
image_view_create_info.components.g = component_swizzles[p_view.swizzle_g];
image_view_create_info.components.b = component_swizzles[p_view.swizzle_b];
image_view_create_info.components.a = component_swizzles[p_view.swizzle_a];
image_view_create_info.subresourceRange.baseMipLevel = 0;
image_view_create_info.subresourceRange.levelCount = image_create_info.mipLevels;
image_view_create_info.subresourceRange.baseArrayLayer = 0;
image_view_create_info.subresourceRange.layerCount = image_create_info.arrayLayers;
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
if (err) {
vmaDestroyImage(allocator, texture.image, texture.allocation);
ERR_FAIL_V_MSG(RID(), "vkCreateImageView failed with error " + itos(err) + ".");
}
//barrier to set layout
{
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_memory_barrier.newLayout = texture.layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture.image;
image_memory_barrier.subresourceRange.aspectMask = texture.barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = image_create_info.mipLevels;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers;
vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
RID id = texture_owner.make_rid(texture);
if (p_data.size()) {
for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) {
_texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL, true);
}
}
return id;
}
RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID p_with_texture) {
_THREAD_SAFE_METHOD_
Texture *src_texture = texture_owner.get_or_null(p_with_texture);
ERR_FAIL_COND_V(!src_texture, RID());
if (src_texture->owner.is_valid()) { //ahh this is a share
p_with_texture = src_texture->owner;
src_texture = texture_owner.get_or_null(src_texture->owner);
ERR_FAIL_COND_V(!src_texture, RID()); //this is a bug
}
//create view
Texture texture = *src_texture;
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_3D,
VK_IMAGE_VIEW_TYPE_CUBE,
VK_IMAGE_VIEW_TYPE_1D_ARRAY,
VK_IMAGE_VIEW_TYPE_2D_ARRAY,
VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
image_view_create_info.viewType = view_types[texture.type];
if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) {
image_view_create_info.format = vulkan_formats[texture.format];
} else {
ERR_FAIL_INDEX_V(p_view.format_override, DATA_FORMAT_MAX, RID());
ERR_FAIL_COND_V_MSG(texture.allowed_shared_formats.find(p_view.format_override) == -1, RID(),
"Format override is not in the list of allowed shareable formats for original texture.");
image_view_create_info.format = vulkan_formats[p_view.format_override];
}
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
image_view_create_info.components.r = component_swizzles[p_view.swizzle_r];
image_view_create_info.components.g = component_swizzles[p_view.swizzle_g];
image_view_create_info.components.b = component_swizzles[p_view.swizzle_b];
image_view_create_info.components.a = component_swizzles[p_view.swizzle_a];
image_view_create_info.subresourceRange.baseMipLevel = 0;
image_view_create_info.subresourceRange.levelCount = texture.mipmaps;
image_view_create_info.subresourceRange.layerCount = texture.layers;
image_view_create_info.subresourceRange.baseArrayLayer = 0;
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkImageViewUsageCreateInfo usage_info;
usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO;
usage_info.pNext = nullptr;
if (p_view.format_override != DATA_FORMAT_MAX) {
//need to validate usage with vulkan
usage_info.usage = 0;
if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_STORAGE_BIT)) {
usage_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
}
}
if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
usage_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
}
if (texture.usage_flags & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
image_view_create_info.pNext = &usage_info;
}
VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + ".");
texture.owner = p_with_texture;
RID id = texture_owner.make_rid(texture);
_add_dependency(id, p_with_texture);
return id;
}
RID RenderingDeviceVulkan::texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, uint64_t p_flags, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers) {
_THREAD_SAFE_METHOD_
// This method creates a texture object using a VkImage created by an extension, module or other external source (OpenXR uses this).
VkImage image = (VkImage)p_image;
Texture texture;
texture.image = image;
// if we leave texture.allocation as a nullptr, would that be enough to detect we don't "own" the image?
// also leave texture.allocation_info alone
// we'll set texture.view later on
texture.type = p_type;
texture.format = p_format;
texture.samples = p_samples;
texture.width = p_width;
texture.height = p_height;
texture.depth = p_depth;
texture.layers = p_layers;
texture.mipmaps = 0; // maybe make this settable too?
texture.usage_flags = p_flags;
texture.base_mipmap = 0;
texture.base_layer = 0;
texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_UNORM);
texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB);
// Do we need to do something with texture.layout ?
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
// if (format_has_stencil(p_format.format)) {
// texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
// }
} else {
texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
}
// Create a view for us to use
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_3D,
VK_IMAGE_VIEW_TYPE_CUBE,
VK_IMAGE_VIEW_TYPE_1D_ARRAY,
VK_IMAGE_VIEW_TYPE_2D_ARRAY,
VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
image_view_create_info.viewType = view_types[texture.type];
image_view_create_info.format = vulkan_formats[texture.format];
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
// hardcode for now, maybe make this settable from outside..
image_view_create_info.components.r = component_swizzles[TEXTURE_SWIZZLE_R];
image_view_create_info.components.g = component_swizzles[TEXTURE_SWIZZLE_G];
image_view_create_info.components.b = component_swizzles[TEXTURE_SWIZZLE_B];
image_view_create_info.components.a = component_swizzles[TEXTURE_SWIZZLE_A];
image_view_create_info.subresourceRange.baseMipLevel = 0;
image_view_create_info.subresourceRange.levelCount = texture.mipmaps;
image_view_create_info.subresourceRange.baseArrayLayer = 0;
image_view_create_info.subresourceRange.layerCount = texture.layers;
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
if (err) {
// vmaDestroyImage(allocator, texture.image, texture.allocation);
ERR_FAIL_V_MSG(RID(), "vkCreateImageView failed with error " + itos(err) + ".");
}
//barrier to set layout
{
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_memory_barrier.newLayout = texture.layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture.image;
image_memory_barrier.subresourceRange.aspectMask = texture.barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = texture.mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = texture.layers;
vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
RID id = texture_owner.make_rid(texture);
return id;
}
RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps, TextureSliceType p_slice_type) {
_THREAD_SAFE_METHOD_
Texture *src_texture = texture_owner.get_or_null(p_with_texture);
ERR_FAIL_COND_V(!src_texture, RID());
if (src_texture->owner.is_valid()) { //ahh this is a share
p_with_texture = src_texture->owner;
src_texture = texture_owner.get_or_null(src_texture->owner);
ERR_FAIL_COND_V(!src_texture, RID()); //this is a bug
}
ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_CUBEMAP && (src_texture->type != TEXTURE_TYPE_CUBE && src_texture->type != TEXTURE_TYPE_CUBE_ARRAY), RID(),
"Can only create a cubemap slice from a cubemap or cubemap array mipmap");
ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_3D && src_texture->type != TEXTURE_TYPE_3D, RID(),
"Can only create a 3D slice from a 3D texture");
ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_2D_ARRAY && (src_texture->type != TEXTURE_TYPE_2D_ARRAY), RID(),
"Can only create an array slice from a 2D array mipmap");
//create view
ERR_FAIL_UNSIGNED_INDEX_V(p_mipmap, src_texture->mipmaps, RID());
ERR_FAIL_COND_V(p_mipmap + p_mipmaps > src_texture->mipmaps, RID());
ERR_FAIL_UNSIGNED_INDEX_V(p_layer, src_texture->layers, RID());
int slice_layers = 1;
if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) {
ERR_FAIL_COND_V_MSG(p_layer != 0, RID(), "layer must be 0 when obtaining a 2D array mipmap slice");
slice_layers = src_texture->layers;
} else if (p_slice_type == TEXTURE_SLICE_CUBEMAP) {
slice_layers = 6;
}
Texture texture = *src_texture;
get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height);
texture.mipmaps = p_mipmaps;
texture.layers = slice_layers;
texture.base_mipmap = p_mipmap;
texture.base_layer = p_layer;
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D,
};
image_view_create_info.viewType = view_types[texture.type];
if (p_slice_type == TEXTURE_SLICE_CUBEMAP) {
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
} else if (p_slice_type == TEXTURE_SLICE_3D) {
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
} else if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) {
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
}
if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) {
image_view_create_info.format = vulkan_formats[texture.format];
} else {
ERR_FAIL_INDEX_V(p_view.format_override, DATA_FORMAT_MAX, RID());
ERR_FAIL_COND_V_MSG(texture.allowed_shared_formats.find(p_view.format_override) == -1, RID(),
"Format override is not in the list of allowed shareable formats for original texture.");
image_view_create_info.format = vulkan_formats[p_view.format_override];
}
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
image_view_create_info.components.r = component_swizzles[p_view.swizzle_r];
image_view_create_info.components.g = component_swizzles[p_view.swizzle_g];
image_view_create_info.components.b = component_swizzles[p_view.swizzle_b];
image_view_create_info.components.a = component_swizzles[p_view.swizzle_a];
if (p_slice_type == TEXTURE_SLICE_CUBEMAP) {
ERR_FAIL_COND_V_MSG(p_layer >= src_texture->layers, RID(),
"Specified layer is invalid for cubemap");
ERR_FAIL_COND_V_MSG((p_layer % 6) != 0, RID(),
"Specified layer must be a multiple of 6.");
}
image_view_create_info.subresourceRange.baseMipLevel = p_mipmap;
image_view_create_info.subresourceRange.levelCount = p_mipmaps;
image_view_create_info.subresourceRange.layerCount = slice_layers;
image_view_create_info.subresourceRange.baseArrayLayer = p_layer;
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + ".");
texture.owner = p_with_texture;
RID id = texture_owner.make_rid(texture);
_add_dependency(id, p_with_texture);
return id;
}
Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier) {
return _texture_update(p_texture, p_layer, p_data, p_post_barrier, false);
}
Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER,
"Updating textures is forbidden during creation of a draw or compute list");
Texture *texture = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!texture, ERR_INVALID_PARAMETER);
if (texture->owner != RID()) {
p_texture = texture->owner;
texture = texture_owner.get_or_null(texture->owner);
ERR_FAIL_COND_V(!texture, ERR_BUG); //this is a bug
}
ERR_FAIL_COND_V_MSG(texture->bound, ERR_CANT_ACQUIRE_RESOURCE,
"Texture can't be updated while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT), ERR_INVALID_PARAMETER,
"Texture requires the TEXTURE_USAGE_CAN_UPDATE_BIT in order to be updatable.");
uint32_t layer_count = texture->layers;
if (texture->type == TEXTURE_TYPE_CUBE || texture->type == TEXTURE_TYPE_CUBE_ARRAY) {
layer_count *= 6;
}
ERR_FAIL_COND_V(p_layer >= layer_count, ERR_INVALID_PARAMETER);
uint32_t width, height;
uint32_t image_size = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, texture->mipmaps, &width, &height);
uint32_t required_size = image_size;
uint32_t required_align = get_compressed_image_format_block_byte_size(texture->format);
if (required_align == 1) {
required_align = get_image_format_pixel_size(texture->format);
}
if ((required_align % 4) != 0) { //alignment rules are really strange
required_align *= 4;
}
ERR_FAIL_COND_V_MSG(required_size != (uint32_t)p_data.size(), ERR_INVALID_PARAMETER,
"Required size for texture update (" + itos(required_size) + ") does not match data supplied size (" + itos(p_data.size()) + ").");
uint32_t region_size = texture_upload_region_size_px;
const uint8_t *r = p_data.ptr();
VkCommandBuffer command_buffer = p_use_setup_queue ? frames[frame].setup_command_buffer : frames[frame].draw_command_buffer;
//barrier to transfer
{
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
uint32_t mipmap_offset = 0;
uint32_t logic_width = texture->width;
uint32_t logic_height = texture->height;
for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) {
uint32_t depth;
uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth);
const uint8_t *read_ptr_mipmap = r + mipmap_offset;
image_size = image_total - mipmap_offset;
for (uint32_t z = 0; z < depth; z++) { //for 3D textures, depth may be > 0
const uint8_t *read_ptr = read_ptr_mipmap + image_size * z / depth;
for (uint32_t x = 0; x < width; x += region_size) {
for (uint32_t y = 0; y < height; y += region_size) {
uint32_t region_w = MIN(region_size, width - x);
uint32_t region_h = MIN(region_size, height - y);
uint32_t region_logic_w = MIN(region_size, logic_width - x);
uint32_t region_logic_h = MIN(region_size, logic_height - y);
uint32_t pixel_size = get_image_format_pixel_size(texture->format);
uint32_t to_allocate = region_w * region_h * pixel_size;
to_allocate >>= get_compressed_image_format_pixel_rshift(texture->format);
uint32_t alloc_offset, alloc_size;
Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, !p_use_setup_queue);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
uint8_t *write_ptr;
{ //map
void *data_ptr = nullptr;
VkResult vkerr = vmaMapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation, &data_ptr);
ERR_FAIL_COND_V_MSG(vkerr, ERR_CANT_CREATE, "vmaMapMemory failed with error " + itos(vkerr) + ".");
write_ptr = (uint8_t *)data_ptr;
write_ptr += alloc_offset;
}
uint32_t block_w, block_h;
get_compressed_image_format_block_dimensions(texture->format, block_w, block_h);
ERR_FAIL_COND_V(region_w % block_w, ERR_BUG);
ERR_FAIL_COND_V(region_h % block_h, ERR_BUG);
if (block_w != 1 || block_h != 1) {
//compressed image (blocks)
//must copy a block region
uint32_t block_size = get_compressed_image_format_block_byte_size(texture->format);
//re-create current variables in blocky format
uint32_t xb = x / block_w;
uint32_t yb = y / block_h;
uint32_t wb = width / block_w;
//uint32_t hb = height / block_h;
uint32_t region_wb = region_w / block_w;
uint32_t region_hb = region_h / block_h;
for (uint32_t xr = 0; xr < region_wb; xr++) {
for (uint32_t yr = 0; yr < region_hb; yr++) {
uint32_t src_offset = ((yr + yb) * wb + xr + xb) * block_size;
uint32_t dst_offset = (yr * region_wb + xr) * block_size;
//copy block
for (uint32_t i = 0; i < block_size; i++) {
write_ptr[dst_offset + i] = read_ptr[src_offset + i];
}
}
}
} else {
//regular image (pixels)
//must copy a pixel region
for (uint32_t xr = 0; xr < region_w; xr++) {
for (uint32_t yr = 0; yr < region_h; yr++) {
uint32_t src_offset = ((yr + y) * width + xr + x) * pixel_size;
uint32_t dst_offset = (yr * region_w + xr) * pixel_size;
//copy block
for (uint32_t i = 0; i < pixel_size; i++) {
write_ptr[dst_offset + i] = read_ptr[src_offset + i];
}
}
}
}
{ //unmap
vmaUnmapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation);
}
VkBufferImageCopy buffer_image_copy;
buffer_image_copy.bufferOffset = alloc_offset;
buffer_image_copy.bufferRowLength = 0; //tightly packed
buffer_image_copy.bufferImageHeight = 0; //tightly packed
buffer_image_copy.imageSubresource.aspectMask = texture->read_aspect_mask;
buffer_image_copy.imageSubresource.mipLevel = mm_i;
buffer_image_copy.imageSubresource.baseArrayLayer = p_layer;
buffer_image_copy.imageSubresource.layerCount = 1;
buffer_image_copy.imageOffset.x = x;
buffer_image_copy.imageOffset.y = y;
buffer_image_copy.imageOffset.z = z;
buffer_image_copy.imageExtent.width = region_logic_w;
buffer_image_copy.imageExtent.height = region_logic_h;
buffer_image_copy.imageExtent.depth = 1;
vkCmdCopyBufferToImage(command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy);
staging_buffer_blocks.write[staging_buffer_current].fill_amount += alloc_size;
}
}
}
mipmap_offset = image_total;
logic_width = MAX(1, logic_width >> 1);
logic_height = MAX(1, logic_height >> 1);
}
//barrier to restore layout
{
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT;
}
if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT;
}
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = texture->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
if (texture->used_in_frame != frames_drawn) {
texture->used_in_raster = false;
texture->used_in_compute = false;
texture->used_in_frame = frames_drawn;
}
texture->used_in_transfer = true;
return OK;
}
Vector<uint8_t> RenderingDeviceVulkan::_texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d) {
uint32_t width, height, depth;
uint32_t image_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth);
Vector<uint8_t> image_data;
image_data.resize(image_size);
void *img_mem;
vmaMapMemory(allocator, p_allocation, &img_mem);
uint32_t blockw, blockh;
get_compressed_image_format_block_dimensions(tex->format, blockw, blockh);
uint32_t block_size = get_compressed_image_format_block_byte_size(tex->format);
uint32_t pixel_size = get_image_format_pixel_size(tex->format);
{
uint8_t *w = image_data.ptrw();
uint32_t mipmap_offset = 0;
for (uint32_t mm_i = 0; mm_i < tex->mipmaps; mm_i++) {
uint32_t image_total = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, mm_i + 1, &width, &height, &depth);
uint8_t *write_ptr_mipmap = w + mipmap_offset;
image_size = image_total - mipmap_offset;
VkImageSubresource image_sub_resorce;
image_sub_resorce.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_sub_resorce.arrayLayer = p_layer;
image_sub_resorce.mipLevel = mm_i;
VkSubresourceLayout layout;
vkGetImageSubresourceLayout(device, p_image, &image_sub_resorce, &layout);
for (uint32_t z = 0; z < depth; z++) {
uint8_t *write_ptr = write_ptr_mipmap + z * image_size / depth;
const uint8_t *slice_read_ptr = ((uint8_t *)img_mem) + layout.offset + z * layout.depthPitch;
if (block_size > 1) {
//compressed
uint32_t line_width = (block_size * (width / blockw));
for (uint32_t y = 0; y < height / blockh; y++) {
const uint8_t *rptr = slice_read_ptr + y * layout.rowPitch;
uint8_t *wptr = write_ptr + y * line_width;
memcpy(wptr, rptr, line_width);
}
} else {
//uncompressed
for (uint32_t y = 0; y < height; y++) {
const uint8_t *rptr = slice_read_ptr + y * layout.rowPitch;
uint8_t *wptr = write_ptr + y * pixel_size * width;
memcpy(wptr, rptr, (uint64_t)pixel_size * width);
}
}
}
mipmap_offset = image_total;
}
}
vmaUnmapMemory(allocator, p_allocation);
return image_data;
}
Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t p_layer) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, Vector<uint8_t>());
ERR_FAIL_COND_V_MSG(tex->bound, Vector<uint8_t>(),
"Texture can't be retrieved while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V_MSG(!(tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), Vector<uint8_t>(),
"Texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved.");
uint32_t layer_count = tex->layers;
if (tex->type == TEXTURE_TYPE_CUBE || tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
layer_count *= 6;
}
ERR_FAIL_COND_V(p_layer >= layer_count, Vector<uint8_t>());
if (tex->usage_flags & TEXTURE_USAGE_CPU_READ_BIT) {
//does not need anything fancy, map and read.
return _texture_get_data_from_image(tex, tex->image, tex->allocation, p_layer);
} else {
//compute total image size
uint32_t width, height, depth;
uint32_t buffer_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps, &width, &height, &depth);
//allocate buffer
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; //makes more sense to retrieve
Buffer tmp_buffer;
_buffer_allocate(&tmp_buffer, buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY);
{ //Source image barrier
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = tex->image;
image_memory_barrier.subresourceRange.aspectMask = tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = tex->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
uint32_t computed_w = tex->width;
uint32_t computed_h = tex->height;
uint32_t computed_d = tex->depth;
uint32_t prev_size = 0;
uint32_t offset = 0;
for (uint32_t i = 0; i < tex->mipmaps; i++) {
VkBufferImageCopy buffer_image_copy;
uint32_t image_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, i + 1);
uint32_t size = image_size - prev_size;
prev_size = image_size;
buffer_image_copy.bufferOffset = offset;
buffer_image_copy.bufferImageHeight = 0;
buffer_image_copy.bufferRowLength = 0;
buffer_image_copy.imageSubresource.aspectMask = tex->read_aspect_mask;
buffer_image_copy.imageSubresource.baseArrayLayer = p_layer;
buffer_image_copy.imageSubresource.layerCount = 1;
buffer_image_copy.imageSubresource.mipLevel = i;
buffer_image_copy.imageOffset.x = 0;
buffer_image_copy.imageOffset.y = 0;
buffer_image_copy.imageOffset.z = 0;
buffer_image_copy.imageExtent.width = computed_w;
buffer_image_copy.imageExtent.height = computed_h;
buffer_image_copy.imageExtent.depth = computed_d;
vkCmdCopyImageToBuffer(command_buffer, tex->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tmp_buffer.buffer, 1, &buffer_image_copy);
computed_w = MAX(1, computed_w >> 1);
computed_h = MAX(1, computed_h >> 1);
computed_d = MAX(1, computed_d >> 1);
offset += size;
}
{ //restore src
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
if (tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
image_memory_barrier.dstAccessMask |= VK_ACCESS_SHADER_WRITE_BIT;
}
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = tex->image;
image_memory_barrier.subresourceRange.aspectMask = tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = tex->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
_flush(true);
void *buffer_mem;
VkResult vkerr = vmaMapMemory(allocator, tmp_buffer.allocation, &buffer_mem);
ERR_FAIL_COND_V_MSG(vkerr, Vector<uint8_t>(), "vmaMapMemory failed with error " + itos(vkerr) + ".");
Vector<uint8_t> buffer_data;
{
buffer_data.resize(buffer_size);
uint8_t *w = buffer_data.ptrw();
memcpy(w, buffer_mem, buffer_size);
}
vmaUnmapMemory(allocator, tmp_buffer.allocation);
_buffer_free(&tmp_buffer);
return buffer_data;
}
}
bool RenderingDeviceVulkan::texture_is_shared(RID p_texture) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, false);
return tex->owner.is_valid();
}
bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) {
return texture_owner.owns(p_texture);
}
Size2i RenderingDeviceVulkan::texture_size(RID p_texture) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, Size2i());
return Size2i(tex->width, tex->height);
}
Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) {
_THREAD_SAFE_METHOD_
Texture *src_tex = texture_owner.get_or_null(p_from_texture);
ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER,
"Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER,
"Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved.");
uint32_t src_layer_count = src_tex->layers;
uint32_t src_width, src_height, src_depth;
get_image_format_required_size(src_tex->format, src_tex->width, src_tex->height, src_tex->depth, p_src_mipmap + 1, &src_width, &src_height, &src_depth);
if (src_tex->type == TEXTURE_TYPE_CUBE || src_tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
src_layer_count *= 6;
}
ERR_FAIL_COND_V(p_from.x < 0 || p_from.x + p_size.x > src_width, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_from.y < 0 || p_from.y + p_size.y > src_height, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_from.z < 0 || p_from.z + p_size.z > src_depth, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_src_mipmap >= src_tex->mipmaps, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_src_layer >= src_layer_count, ERR_INVALID_PARAMETER);
Texture *dst_tex = texture_owner.get_or_null(p_to_texture);
ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER,
"Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER,
"Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved.");
uint32_t dst_layer_count = dst_tex->layers;
uint32_t dst_width, dst_height, dst_depth;
get_image_format_required_size(dst_tex->format, dst_tex->width, dst_tex->height, dst_tex->depth, p_dst_mipmap + 1, &dst_width, &dst_height, &dst_depth);
if (dst_tex->type == TEXTURE_TYPE_CUBE || dst_tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
dst_layer_count *= 6;
}
ERR_FAIL_COND_V(p_to.x < 0 || p_to.x + p_size.x > dst_width, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_to.y < 0 || p_to.y + p_size.y > dst_height, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_to.z < 0 || p_to.z + p_size.z > dst_depth, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_dst_mipmap >= dst_tex->mipmaps, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_dst_layer >= dst_layer_count, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER,
"Source and destination texture must be of the same type (color or depth).");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
{
//PRE Copy the image
{ //Source
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = p_src_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ //Dest
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = dst_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = dst_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = p_dst_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = p_dst_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
//COPY
{
VkImageCopy image_copy_region;
image_copy_region.srcSubresource.aspectMask = src_tex->read_aspect_mask;
image_copy_region.srcSubresource.baseArrayLayer = p_src_layer;
image_copy_region.srcSubresource.layerCount = 1;
image_copy_region.srcSubresource.mipLevel = p_src_mipmap;
image_copy_region.srcOffset.x = p_from.x;
image_copy_region.srcOffset.y = p_from.y;
image_copy_region.srcOffset.z = p_from.z;
image_copy_region.dstSubresource.aspectMask = dst_tex->read_aspect_mask;
image_copy_region.dstSubresource.baseArrayLayer = p_dst_layer;
image_copy_region.dstSubresource.layerCount = 1;
image_copy_region.dstSubresource.mipLevel = p_dst_mipmap;
image_copy_region.dstOffset.x = p_to.x;
image_copy_region.dstOffset.y = p_to.y;
image_copy_region.dstOffset.z = p_to.z;
image_copy_region.extent.width = p_size.x;
image_copy_region.extent.height = p_size.y;
image_copy_region.extent.depth = p_size.z;
vkCmdCopyImage(command_buffer, src_tex->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_tex->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy_region);
}
// RESTORE LAYOUT for SRC and DST
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
{ //restore src
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = p_src_mipmap;
image_memory_barrier.subresourceRange.levelCount = src_tex->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ //make dst readable
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = dst_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_memory_barrier.subresourceRange.baseMipLevel = p_src_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
}
return OK;
}
Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier) {
_THREAD_SAFE_METHOD_
Texture *src_tex = texture_owner.get_or_null(p_from_texture);
ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER,
"Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER,
"Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved.");
ERR_FAIL_COND_V_MSG(src_tex->type != TEXTURE_TYPE_2D, ERR_INVALID_PARAMETER, "Source texture must be 2D (or a slice of a 3D/Cube texture)");
ERR_FAIL_COND_V_MSG(src_tex->samples == TEXTURE_SAMPLES_1, ERR_INVALID_PARAMETER, "Source texture must be multisampled.");
Texture *dst_tex = texture_owner.get_or_null(p_to_texture);
ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER,
"Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER,
"Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved.");
ERR_FAIL_COND_V_MSG(dst_tex->type != TEXTURE_TYPE_2D, ERR_INVALID_PARAMETER, "Destination texture must be 2D (or a slice of a 3D/Cube texture).");
ERR_FAIL_COND_V_MSG(dst_tex->samples != TEXTURE_SAMPLES_1, ERR_INVALID_PARAMETER, "Destination texture must not be multisampled.");
ERR_FAIL_COND_V_MSG(src_tex->format != dst_tex->format, ERR_INVALID_PARAMETER, "Source and Destination textures must be the same format.");
ERR_FAIL_COND_V_MSG(src_tex->width != dst_tex->width && src_tex->height != dst_tex->height && src_tex->depth != dst_tex->depth, ERR_INVALID_PARAMETER, "Source and Destination textures must have the same dimensions.");
ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER,
"Source and destination texture must be of the same type (color or depth).");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
{
//PRE Copy the image
{ //Source
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ //Dest
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = dst_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = dst_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = dst_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
//COPY
{
VkImageResolve image_copy_region;
image_copy_region.srcSubresource.aspectMask = src_tex->read_aspect_mask;
image_copy_region.srcSubresource.baseArrayLayer = src_tex->base_layer;
image_copy_region.srcSubresource.layerCount = 1;
image_copy_region.srcSubresource.mipLevel = src_tex->base_mipmap;
image_copy_region.srcOffset.x = 0;
image_copy_region.srcOffset.y = 0;
image_copy_region.srcOffset.z = 0;
image_copy_region.dstSubresource.aspectMask = dst_tex->read_aspect_mask;
image_copy_region.dstSubresource.baseArrayLayer = dst_tex->base_layer;
image_copy_region.dstSubresource.layerCount = 1;
image_copy_region.dstSubresource.mipLevel = dst_tex->base_mipmap;
image_copy_region.dstOffset.x = 0;
image_copy_region.dstOffset.y = 0;
image_copy_region.dstOffset.z = 0;
image_copy_region.extent.width = src_tex->width;
image_copy_region.extent.height = src_tex->height;
image_copy_region.extent.depth = src_tex->depth;
vkCmdResolveImage(command_buffer, src_tex->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_tex->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy_region);
}
// RESTORE LAYOUT for SRC and DST
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
{ //restore src
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ //make dst readable
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = dst_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_memory_barrier.subresourceRange.baseMipLevel = dst_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
}
return OK;
}
Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier) {
_THREAD_SAFE_METHOD_
Texture *src_tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER,
"Source texture can't be cleared while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture.");
ERR_FAIL_COND_V(p_layers == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_mipmaps == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER,
"Source texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be cleared.");
uint32_t src_layer_count = src_tex->layers;
if (src_tex->type == TEXTURE_TYPE_CUBE || src_tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
src_layer_count *= 6;
}
ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_base_layer + p_layers > src_layer_count, ERR_INVALID_PARAMETER);
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
VkImageLayout clear_layout = (src_tex->layout == VK_IMAGE_LAYOUT_GENERAL) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
// NOTE: Perhaps the valid stages/accesses for a given owner should be a property of the owner. (Here and places like _get_buffer_from_owner)
const VkPipelineStageFlags valid_texture_stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
constexpr VkAccessFlags read_access = VK_ACCESS_SHADER_READ_BIT;
constexpr VkAccessFlags read_write_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
const VkAccessFlags valid_texture_access = (src_tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) ? read_write_access : read_access;
{ // Barrier from previous access with optional layout change (see clear_layout logic above)
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = valid_texture_access;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = clear_layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap + p_base_mipmap;
image_memory_barrier.subresourceRange.levelCount = p_mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer;
image_memory_barrier.subresourceRange.layerCount = p_layers;
vkCmdPipelineBarrier(command_buffer, valid_texture_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
VkClearColorValue clear_color;
clear_color.float32[0] = p_color.r;
clear_color.float32[1] = p_color.g;
clear_color.float32[2] = p_color.b;
clear_color.float32[3] = p_color.a;
VkImageSubresourceRange range;
range.aspectMask = src_tex->read_aspect_mask;
range.baseArrayLayer = src_tex->base_layer + p_base_layer;
range.layerCount = p_layers;
range.baseMipLevel = src_tex->base_mipmap + p_base_mipmap;
range.levelCount = p_mipmaps;
vkCmdClearColorImage(command_buffer, src_tex->image, clear_layout, &clear_color, 1, &range);
{ // Barrier to post clear accesses (changing back the layout if needed)
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = clear_layout;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap + p_base_mipmap;
image_memory_barrier.subresourceRange.levelCount = p_mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer;
image_memory_barrier.subresourceRange.layerCount = p_layers;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
if (src_tex->used_in_frame != frames_drawn) {
src_tex->used_in_raster = false;
src_tex->used_in_compute = false;
src_tex->used_in_frame = frames_drawn;
}
src_tex->used_in_transfer = true;
return OK;
}
bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const {
ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false);
_THREAD_SAFE_METHOD_
//validate that this image is supported for the intended use
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), vulkan_formats[p_format], &properties);
VkFormatFeatureFlags flags;
if (p_usage & TEXTURE_USAGE_CPU_READ_BIT) {
flags = properties.linearTilingFeatures;
} else {
flags = properties.optimalTilingFeatures;
}
if (p_usage & TEXTURE_USAGE_SAMPLING_BIT && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
return false;
}
if (p_usage & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
return false;
}
if (p_usage & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
return false;
}
if (p_usage & TEXTURE_USAGE_STORAGE_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
return false;
}
if (p_usage & TEXTURE_USAGE_STORAGE_ATOMIC_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) {
return false;
}
return true;
}
/********************/
/**** ATTACHMENT ****/
/********************/
VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentFormat> &p_attachments, const Vector<FramebufferPass> &p_passes, InitialAction p_initial_action, FinalAction p_final_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, uint32_t p_view_count, Vector<TextureSamples> *r_samples) {
// Set up dependencies from/to external equivalent to the default (implicit) one, and then amend them
const VkPipelineStageFlags default_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // From Section 7.1 of Vulkan API Spec v1.1.148
VkPipelineStageFlags reading_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
VkSubpassDependency dependencies[2] = { { VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, default_access_mask, 0 },
{ 0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, default_access_mask, 0, 0 } };
VkSubpassDependency &dependency_from_external = dependencies[0];
VkSubpassDependency &dependency_to_external = dependencies[1];
LocalVector<int32_t> attachment_last_pass;
attachment_last_pass.resize(p_attachments.size());
Vector<VkAttachmentDescription> attachments;
for (int i = 0; i < p_attachments.size(); i++) {
ERR_FAIL_INDEX_V(p_attachments[i].format, DATA_FORMAT_MAX, VK_NULL_HANDLE);
ERR_FAIL_INDEX_V(p_attachments[i].samples, TEXTURE_SAMPLES_MAX, VK_NULL_HANDLE);
ERR_FAIL_COND_V_MSG(!(p_attachments[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)),
VK_NULL_HANDLE, "Texture format for index (" + itos(i) + ") requires an attachment (color, depth, input or stencil) bit set.");
VkAttachmentDescription description = {};
description.flags = 0;
description.format = vulkan_formats[p_attachments[i].format];
description.samples = rasterization_sample_count[p_attachments[i].samples];
bool is_sampled = p_attachments[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT;
bool is_storage = p_attachments[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT;
bool is_depth = p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
// For each UNDEFINED, assume the prior use was a *read*, as we'd be discarding the output of a write
// Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs.
// the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that
// stage
switch (is_depth ? p_initial_depth_action : p_initial_action) {
case INITIAL_ACTION_CLEAR_REGION:
case INITIAL_ACTION_CLEAR: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_KEEP: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_DROP: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_CLEAR_REGION_CONTINUE:
case INITIAL_ACTION_CONTINUE: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
default: {
ERR_FAIL_V(VK_NULL_HANDLE); //should never reach here
}
}
bool used_last = false;
{
int last_pass = p_passes.size() - 1;
if (is_depth) {
//likely missing depth resolve?
if (p_passes[last_pass].depth_attachment == i) {
used_last = true;
}
} else {
if (p_passes[last_pass].resolve_attachments.size()) {
//if using resolve attachments, check resolve attachments
for (int j = 0; j < p_passes[last_pass].resolve_attachments.size(); j++) {
if (p_passes[last_pass].resolve_attachments[j] == i) {
used_last = true;
break;
}
}
} else {
for (int j = 0; j < p_passes[last_pass].color_attachments.size(); j++) {
if (p_passes[last_pass].color_attachments[j] == i) {
used_last = true;
break;
}
}
}
}
if (!used_last) {
for (int j = 0; j < p_passes[last_pass].preserve_attachments.size(); j++) {
if (p_passes[last_pass].preserve_attachments[j] == i) {
used_last = true;
break;
}
}
}
}
FinalAction final_action = p_final_action;
FinalAction final_depth_action = p_final_depth_action;
if (!used_last) {
if (is_depth) {
final_depth_action = FINAL_ACTION_DISCARD;
} else {
final_action = FINAL_ACTION_DISCARD;
}
}
switch (is_depth ? final_depth_action : final_action) {
case FINAL_ACTION_READ: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, false);
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, true);
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
// TODO: What does this mean about the next usage (and thus appropriate dependency masks
}
} break;
case FINAL_ACTION_DISCARD: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
}
} break;
case FINAL_ACTION_CONTINUE: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
description.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
}
} break;
default: {
ERR_FAIL_V(VK_NULL_HANDLE); //should never reach here
}
}
attachment_last_pass[i] = -1;
attachments.push_back(description);
}
LocalVector<VkSubpassDescription> subpasses;
LocalVector<LocalVector<VkAttachmentReference>> color_reference_array;
LocalVector<LocalVector<VkAttachmentReference>> input_reference_array;
LocalVector<LocalVector<VkAttachmentReference>> resolve_reference_array;
LocalVector<LocalVector<uint32_t>> preserve_reference_array;
LocalVector<VkAttachmentReference> depth_reference_array;
subpasses.resize(p_passes.size());
color_reference_array.resize(p_passes.size());
input_reference_array.resize(p_passes.size());
resolve_reference_array.resize(p_passes.size());
preserve_reference_array.resize(p_passes.size());
depth_reference_array.resize(p_passes.size());
LocalVector<VkSubpassDependency> subpass_dependencies;
for (int i = 0; i < p_passes.size(); i++) {
const FramebufferPass *pass = &p_passes[i];
LocalVector<VkAttachmentReference> &color_references = color_reference_array[i];
TextureSamples texture_samples = TEXTURE_SAMPLES_1;
bool is_multisample_first = true;
for (int j = 0; j < pass->color_attachments.size(); j++) {
int32_t attachment = pass->color_attachments[j];
VkAttachmentReference reference;
if (attachment == FramebufferPass::ATTACHMENT_UNUSED) {
reference.attachment = VK_ATTACHMENT_UNUSED;
reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
} else {
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), color attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as depth, but it's not usable as color attachment.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
if (is_multisample_first) {
texture_samples = p_attachments[attachment].samples;
is_multisample_first = false;
} else {
ERR_FAIL_COND_V_MSG(texture_samples != p_attachments[attachment].samples, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), if an attachment is marked as multisample, all of them should be multisample and use the same number of samples.");
}
reference.attachment = attachment;
reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachment_last_pass[attachment] = i;
}
color_references.push_back(reference);
}
LocalVector<VkAttachmentReference> &input_references = input_reference_array[i];
for (int j = 0; j < pass->input_attachments.size(); j++) {
int32_t attachment = pass->input_attachments[j];
VkAttachmentReference reference;
if (attachment == FramebufferPass::ATTACHMENT_UNUSED) {
reference.attachment = VK_ATTACHMENT_UNUSED;
reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
} else {
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), input attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it isn't marked as an input texture.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
reference.attachment = attachment;
reference.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachment_last_pass[attachment] = i;
}
input_references.push_back(reference);
}
LocalVector<VkAttachmentReference> &resolve_references = resolve_reference_array[i];
if (pass->resolve_attachments.size() > 0) {
ERR_FAIL_COND_V_MSG(pass->resolve_attachments.size() != pass->color_attachments.size(), VK_NULL_HANDLE, "The amount of resolve attachments (" + itos(pass->resolve_attachments.size()) + ") must match the number of color attachments (" + itos(pass->color_attachments.size()) + ").");
ERR_FAIL_COND_V_MSG(texture_samples == TEXTURE_SAMPLES_1, VK_NULL_HANDLE, "Resolve attachments specified, but color attachments are not multisample.");
}
for (int j = 0; j < pass->resolve_attachments.size(); j++) {
int32_t attachment = pass->resolve_attachments[j];
VkAttachmentReference reference;
if (attachment == FramebufferPass::ATTACHMENT_UNUSED) {
reference.attachment = VK_ATTACHMENT_UNUSED;
reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
} else {
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(pass->color_attachments[j] == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment (" + itos(j) + "), the respective color attachment is marked as unused.");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment, it isn't marked as a color texture.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
bool multisample = p_attachments[attachment].samples > TEXTURE_SAMPLES_1;
ERR_FAIL_COND_V_MSG(multisample, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachments can't be multisample.");
reference.attachment = attachment;
reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachment_last_pass[attachment] = i;
}
resolve_references.push_back(reference);
}
VkAttachmentReference &depth_stencil_reference = depth_reference_array[i];
if (pass->depth_attachment != FramebufferPass::ATTACHMENT_UNUSED) {
int32_t attachment = pass->depth_attachment;
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), depth attachment.");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as depth, but it's not a depth attachment.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
depth_stencil_reference.attachment = attachment;
depth_stencil_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment_last_pass[attachment] = i;
if (is_multisample_first) {
texture_samples = p_attachments[attachment].samples;
is_multisample_first = false;
} else {
ERR_FAIL_COND_V_MSG(texture_samples != p_attachments[attachment].samples, VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), if an attachment is marked as multisample, all of them should be multisample and use the same number of samples including the depth.");
}
} else {
depth_stencil_reference.attachment = VK_ATTACHMENT_UNUSED;
depth_stencil_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
}
LocalVector<uint32_t> &preserve_references = preserve_reference_array[i];
for (int j = 0; j < pass->preserve_attachments.size(); j++) {
int32_t attachment = pass->preserve_attachments[j];
ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused.");
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ").");
if (attachment_last_pass[attachment] != i) {
//preserve can still be used to keep depth or color from being discarded after use
attachment_last_pass[attachment] = i;
preserve_references.push_back(attachment);
}
}
VkSubpassDescription &subpass = subpasses[i];
subpass.flags = 0;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.inputAttachmentCount = input_references.size();
if (input_references.size()) {
subpass.pInputAttachments = input_references.ptr();
} else {
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = color_references.size();
if (color_references.size()) {
subpass.pColorAttachments = color_references.ptr();
} else {
subpass.pColorAttachments = nullptr;
}
if (depth_stencil_reference.attachment != VK_ATTACHMENT_UNUSED) {
subpass.pDepthStencilAttachment = &depth_stencil_reference;
} else {
subpass.pDepthStencilAttachment = nullptr;
}
if (resolve_references.size()) {
subpass.pResolveAttachments = resolve_references.ptr();
} else {
subpass.pResolveAttachments = nullptr;
}
subpass.preserveAttachmentCount = preserve_references.size();
if (preserve_references.size()) {
subpass.pPreserveAttachments = preserve_references.ptr();
} else {
subpass.pPreserveAttachments = nullptr;
}
if (r_samples) {
r_samples->push_back(texture_samples);
}
if (i > 0) {
VkSubpassDependency dependency;
dependency.srcSubpass = i - 1;
dependency.dstSubpass = i;
dependency.srcStageMask = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
subpass_dependencies.push_back(dependency);
}
/*
// NOTE: Big Mallet Approach -- any layout transition causes a full barrier
if (reference.layout != description.initialLayout) {
// NOTE: this should be smarter based on the texture's knowledge of its previous role
dependency_from_external.srcStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
dependency_from_external.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
}
if (reference.layout != description.finalLayout) {
// NOTE: this should be smarter based on the texture's knowledge of its subsequent role
dependency_to_external.dstStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
dependency_to_external.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
}
*/
}
VkRenderPassCreateInfo render_pass_create_info;
render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
render_pass_create_info.pNext = nullptr;
render_pass_create_info.flags = 0;
render_pass_create_info.attachmentCount = attachments.size();
render_pass_create_info.pAttachments = attachments.ptr();
render_pass_create_info.subpassCount = subpasses.size();
render_pass_create_info.pSubpasses = subpasses.ptr();
// Commenting this because it seems it just avoids raster and compute to work at the same time.
// Other barriers seem to be protecting the render pass fine.
// render_pass_create_info.dependencyCount = 2;
// render_pass_create_info.pDependencies = dependencies;
render_pass_create_info.dependencyCount = subpass_dependencies.size();
if (subpass_dependencies.size()) {
render_pass_create_info.pDependencies = subpass_dependencies.ptr();
} else {
render_pass_create_info.pDependencies = nullptr;
}
// These are only used if we use multiview but we need to define them in scope.
const uint32_t view_mask = (1 << p_view_count) - 1;
const uint32_t correlation_mask = (1 << p_view_count) - 1;
Vector<uint32_t> view_masks;
VkRenderPassMultiviewCreateInfo render_pass_multiview_create_info;
if (p_view_count > 1) {
const VulkanContext::MultiviewCapabilities capabilities = context->get_multiview_capabilities();
// For now this only works with multiview!
ERR_FAIL_COND_V_MSG(!capabilities.is_supported, VK_NULL_HANDLE, "Multiview not supported");
// Make sure we limit this to the number of views we support.
ERR_FAIL_COND_V_MSG(p_view_count > capabilities.max_view_count, VK_NULL_HANDLE, "Hardware does not support requested number of views for Multiview render pass");
// Set view masks for each subpass
for (uint32_t i = 0; i < subpasses.size(); i++) {
view_masks.push_back(view_mask);
}
render_pass_multiview_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO;
render_pass_multiview_create_info.pNext = nullptr;
render_pass_multiview_create_info.subpassCount = subpasses.size();
render_pass_multiview_create_info.pViewMasks = view_masks.ptr();
render_pass_multiview_create_info.dependencyCount = 0;
render_pass_multiview_create_info.pViewOffsets = nullptr;
render_pass_multiview_create_info.correlationMaskCount = 1;
render_pass_multiview_create_info.pCorrelationMasks = &correlation_mask;
render_pass_create_info.pNext = &render_pass_multiview_create_info;
}
VkRenderPass render_pass;
VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass);
ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateRenderPass failed with error " + itos(res) + ".");
return render_pass;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create(const Vector<AttachmentFormat> &p_format, uint32_t p_view_count) {
FramebufferPass pass;
for (int i = 0; i < p_format.size(); i++) {
if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
pass.depth_attachment = i;
} else {
pass.color_attachments.push_back(i);
}
}
Vector<FramebufferPass> passes;
passes.push_back(pass);
return framebuffer_format_create_multipass(p_format, passes, p_view_count);
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_multipass(const Vector<AttachmentFormat> &p_attachments, Vector<FramebufferPass> &p_passes, uint32_t p_view_count) {
_THREAD_SAFE_METHOD_
FramebufferFormatKey key;
key.attachments = p_attachments;
key.passes = p_passes;
key.view_count = p_view_count;
const Map<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key);
if (E) {
//exists, return
return E->get();
}
Vector<TextureSamples> samples;
VkRenderPass render_pass = _render_pass_create(p_attachments, p_passes, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, p_view_count, &samples); //actions don't matter for this use case
if (render_pass == VK_NULL_HANDLE) { //was likely invalid
return INVALID_ID;
}
FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT));
E = framebuffer_format_cache.insert(key, id);
FramebufferFormat fb_format;
fb_format.E = E;
fb_format.render_pass = render_pass;
fb_format.pass_samples = samples;
fb_format.view_count = p_view_count;
framebuffer_formats[id] = fb_format;
return id;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(TextureSamples p_samples) {
FramebufferFormatKey key;
key.passes.push_back(FramebufferPass());
const Map<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key);
if (E) {
//exists, return
return E->get();
}
VkSubpassDescription subpass;
subpass.flags = 0;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.inputAttachmentCount = 0; //unsupported for now
subpass.pInputAttachments = nullptr;
subpass.colorAttachmentCount = 0;
subpass.pColorAttachments = nullptr;
subpass.pDepthStencilAttachment = nullptr;
subpass.pResolveAttachments = nullptr;
subpass.preserveAttachmentCount = 0;
subpass.pPreserveAttachments = nullptr;
VkRenderPassCreateInfo render_pass_create_info;
render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
render_pass_create_info.pNext = nullptr;
render_pass_create_info.flags = 0;
render_pass_create_info.attachmentCount = 0;
render_pass_create_info.pAttachments = nullptr;
render_pass_create_info.subpassCount = 1;
render_pass_create_info.pSubpasses = &subpass;
render_pass_create_info.dependencyCount = 0;
render_pass_create_info.pDependencies = nullptr;
VkRenderPass render_pass;
VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass);
ERR_FAIL_COND_V_MSG(res, 0, "vkCreateRenderPass for empty fb failed with error " + itos(res) + ".");
if (render_pass == VK_NULL_HANDLE) { //was likely invalid
return INVALID_ID;
}
FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT));
E = framebuffer_format_cache.insert(key, id);
FramebufferFormat fb_format;
fb_format.E = E;
fb_format.render_pass = render_pass;
fb_format.pass_samples.push_back(p_samples);
framebuffer_formats[id] = fb_format;
return id;
}
RenderingDevice::TextureSamples RenderingDeviceVulkan::framebuffer_format_get_texture_samples(FramebufferFormatID p_format, uint32_t p_pass) {
Map<FramebufferFormatID, FramebufferFormat>::Element *E = framebuffer_formats.find(p_format);
ERR_FAIL_COND_V(!E, TEXTURE_SAMPLES_1);
ERR_FAIL_COND_V(p_pass >= uint32_t(E->get().pass_samples.size()), TEXTURE_SAMPLES_1);
return E->get().pass_samples[p_pass];
}
/***********************/
/**** RENDER TARGET ****/
/***********************/
RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples, FramebufferFormatID p_format_check) {
_THREAD_SAFE_METHOD_
Framebuffer framebuffer;
framebuffer.format_id = framebuffer_format_create_empty(p_samples);
ERR_FAIL_COND_V(p_format_check != INVALID_FORMAT_ID && framebuffer.format_id != p_format_check, RID());
framebuffer.size = p_size;
framebuffer.view_count = 1;
return framebuffer_owner.make_rid(framebuffer);
}
RID RenderingDeviceVulkan::framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check, uint32_t p_view_count) {
_THREAD_SAFE_METHOD_
FramebufferPass pass;
for (int i = 0; i < p_texture_attachments.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_texture_attachments[i]);
ERR_FAIL_COND_V_MSG(!texture, RID(), "Texture index supplied for framebuffer (" + itos(i) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(texture->layers != p_view_count, RID(), "Layers of our texture doesn't match view count for this framebuffer");
if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
pass.depth_attachment = i;
} else {
pass.color_attachments.push_back(i);
}
}
Vector<FramebufferPass> passes;
passes.push_back(pass);
return framebuffer_create_multipass(p_texture_attachments, passes, p_format_check, p_view_count);
}
RID RenderingDeviceVulkan::framebuffer_create_multipass(const Vector<RID> &p_texture_attachments, Vector<FramebufferPass> &p_passes, FramebufferFormatID p_format_check, uint32_t p_view_count) {
_THREAD_SAFE_METHOD_
Vector<AttachmentFormat> attachments;
Size2i size;
for (int i = 0; i < p_texture_attachments.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_texture_attachments[i]);
ERR_FAIL_COND_V_MSG(!texture, RID(), "Texture index supplied for framebuffer (" + itos(i) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(texture->layers != p_view_count, RID(), "Layers of our texture doesn't match view count for this framebuffer");
if (i == 0) {
size.width = texture->width;
size.height = texture->height;
} else {
ERR_FAIL_COND_V_MSG((uint32_t)size.width != texture->width || (uint32_t)size.height != texture->height, RID(),
"All textures in a framebuffer should be the same size.");
}
AttachmentFormat af;
af.format = texture->format;
af.samples = texture->samples;
af.usage_flags = texture->usage_flags;
attachments.push_back(af);
}
FramebufferFormatID format_id = framebuffer_format_create_multipass(attachments, p_passes, p_view_count);
if (format_id == INVALID_ID) {
return RID();
}
ERR_FAIL_COND_V_MSG(p_format_check != INVALID_ID && format_id != p_format_check, RID(),
"The format used to check this framebuffer differs from the intended framebuffer format.");
Framebuffer framebuffer;
framebuffer.format_id = format_id;
framebuffer.texture_ids = p_texture_attachments;
framebuffer.size = size;
framebuffer.view_count = p_view_count;
RID id = framebuffer_owner.make_rid(framebuffer);
for (int i = 0; i < p_texture_attachments.size(); i++) {
_add_dependency(id, p_texture_attachments[i]);
}
return id;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_get_format(RID p_framebuffer) {
_THREAD_SAFE_METHOD_
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
return framebuffer->format_id;
}
/*****************/
/**** SAMPLER ****/
/*****************/
RID RenderingDeviceVulkan::sampler_create(const SamplerState &p_state) {
_THREAD_SAFE_METHOD_
VkSamplerCreateInfo sampler_create_info;
sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_create_info.pNext = nullptr;
sampler_create_info.flags = 0;
sampler_create_info.magFilter = p_state.mag_filter == SAMPLER_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
sampler_create_info.minFilter = p_state.min_filter == SAMPLER_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
sampler_create_info.mipmapMode = p_state.mip_filter == SAMPLER_FILTER_LINEAR ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST;
ERR_FAIL_INDEX_V(p_state.repeat_u, SAMPLER_REPEAT_MODE_MAX, RID());
sampler_create_info.addressModeU = address_modes[p_state.repeat_u];
ERR_FAIL_INDEX_V(p_state.repeat_v, SAMPLER_REPEAT_MODE_MAX, RID());
sampler_create_info.addressModeV = address_modes[p_state.repeat_v];
ERR_FAIL_INDEX_V(p_state.repeat_w, SAMPLER_REPEAT_MODE_MAX, RID());
sampler_create_info.addressModeW = address_modes[p_state.repeat_w];
sampler_create_info.mipLodBias = p_state.lod_bias;
sampler_create_info.anisotropyEnable = p_state.use_anisotropy;
sampler_create_info.maxAnisotropy = p_state.anisotropy_max;
sampler_create_info.compareEnable = p_state.enable_compare;
ERR_FAIL_INDEX_V(p_state.compare_op, COMPARE_OP_MAX, RID());
sampler_create_info.compareOp = compare_operators[p_state.compare_op];
sampler_create_info.minLod = p_state.min_lod;
sampler_create_info.maxLod = p_state.max_lod;
ERR_FAIL_INDEX_V(p_state.border_color, SAMPLER_BORDER_COLOR_MAX, RID());
sampler_create_info.borderColor = sampler_border_colors[p_state.border_color];
sampler_create_info.unnormalizedCoordinates = p_state.unnormalized_uvw;
VkSampler sampler;
VkResult res = vkCreateSampler(device, &sampler_create_info, nullptr, &sampler);
ERR_FAIL_COND_V_MSG(res, RID(), "vkCreateSampler failed with error " + itos(res) + ".");
return sampler_owner.make_rid(sampler);
}
/**********************/
/**** VERTEX ARRAY ****/
/**********************/
RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, bool p_use_as_storage) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
uint32_t usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
if (p_use_as_storage) {
usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
}
Buffer buffer;
_buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_GPU_ONLY);
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&buffer, 0, r, data_size);
_buffer_memory_barrier(buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, false);
}
return vertex_buffer_owner.make_rid(buffer);
}
// Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated
RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats) {
_THREAD_SAFE_METHOD_
VertexDescriptionKey key;
key.vertex_formats = p_vertex_formats;
VertexFormatID *idptr = vertex_format_cache.getptr(key);
if (idptr) {
return *idptr;
}
//does not exist, create one and cache it
VertexDescriptionCache vdcache;
vdcache.bindings = memnew_arr(VkVertexInputBindingDescription, p_vertex_formats.size());
vdcache.attributes = memnew_arr(VkVertexInputAttributeDescription, p_vertex_formats.size());
Set<int> used_locations;
for (int i = 0; i < p_vertex_formats.size(); i++) {
ERR_CONTINUE(p_vertex_formats[i].format >= DATA_FORMAT_MAX);
ERR_FAIL_COND_V(used_locations.has(p_vertex_formats[i].location), INVALID_ID);
ERR_FAIL_COND_V_MSG(get_format_vertex_size(p_vertex_formats[i].format) == 0, INVALID_ID,
"Data format for attachment (" + itos(i) + "), '" + named_formats[p_vertex_formats[i].format] + "', is not valid for a vertex array.");
vdcache.bindings[i].binding = i;
vdcache.bindings[i].stride = p_vertex_formats[i].stride;
vdcache.bindings[i].inputRate = p_vertex_formats[i].frequency == VERTEX_FREQUENCY_INSTANCE ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vdcache.attributes[i].binding = i;
vdcache.attributes[i].location = p_vertex_formats[i].location;
vdcache.attributes[i].format = vulkan_formats[p_vertex_formats[i].format];
vdcache.attributes[i].offset = p_vertex_formats[i].offset;
used_locations.insert(p_vertex_formats[i].location);
}
vdcache.create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vdcache.create_info.pNext = nullptr;
vdcache.create_info.flags = 0;
vdcache.create_info.vertexAttributeDescriptionCount = p_vertex_formats.size();
vdcache.create_info.pVertexAttributeDescriptions = vdcache.attributes;
vdcache.create_info.vertexBindingDescriptionCount = p_vertex_formats.size();
vdcache.create_info.pVertexBindingDescriptions = vdcache.bindings;
vdcache.vertex_formats = p_vertex_formats;
VertexFormatID id = VertexFormatID(vertex_format_cache.size()) | (VertexFormatID(ID_TYPE_VERTEX_FORMAT) << ID_BASE_SHIFT);
vertex_format_cache[key] = id;
vertex_formats[id] = vdcache;
return id;
}
RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID());
const VertexDescriptionCache &vd = vertex_formats[p_vertex_format];
ERR_FAIL_COND_V(vd.vertex_formats.size() != p_src_buffers.size(), RID());
for (int i = 0; i < p_src_buffers.size(); i++) {
ERR_FAIL_COND_V(!vertex_buffer_owner.owns(p_src_buffers[i]), RID());
}
VertexArray vertex_array;
vertex_array.vertex_count = p_vertex_count;
vertex_array.description = p_vertex_format;
vertex_array.max_instances_allowed = 0xFFFFFFFF; //by default as many as you want
for (int i = 0; i < p_src_buffers.size(); i++) {
Buffer *buffer = vertex_buffer_owner.get_or_null(p_src_buffers[i]);
//validate with buffer
{
const VertexAttribute &atf = vd.vertex_formats[i];
uint32_t element_size = get_format_vertex_size(atf.format);
ERR_FAIL_COND_V(element_size == 0, RID()); //should never happens since this was prevalidated
if (atf.frequency == VERTEX_FREQUENCY_VERTEX) {
//validate size for regular drawing
uint64_t total_size = uint64_t(atf.stride) * (p_vertex_count - 1) + atf.offset + element_size;
ERR_FAIL_COND_V_MSG(total_size > buffer->size, RID(),
"Attachment (" + itos(i) + ") will read past the end of the buffer.");
} else {
//validate size for instances drawing
uint64_t available = buffer->size - atf.offset;
ERR_FAIL_COND_V_MSG(available < element_size, RID(),
"Attachment (" + itos(i) + ") uses instancing, but it's just too small.");
uint32_t instances_allowed = available / atf.stride;
vertex_array.max_instances_allowed = MIN(instances_allowed, vertex_array.max_instances_allowed);
}
}
vertex_array.buffers.push_back(buffer->buffer);
vertex_array.offsets.push_back(0); //offset unused, but passing anyway
}
RID id = vertex_array_owner.make_rid(vertex_array);
for (int i = 0; i < p_src_buffers.size(); i++) {
_add_dependency(id, p_src_buffers[i]);
}
return id;
}
RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector<uint8_t> &p_data, bool p_use_restart_indices) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V(p_index_count == 0, RID());
IndexBuffer index_buffer;
index_buffer.index_type = (p_format == INDEX_BUFFER_FORMAT_UINT16) ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
index_buffer.supports_restart_indices = p_use_restart_indices;
index_buffer.index_count = p_index_count;
uint32_t size_bytes = p_index_count * ((p_format == INDEX_BUFFER_FORMAT_UINT16) ? 2 : 4);
#ifdef DEBUG_ENABLED
if (p_data.size()) {
index_buffer.max_index = 0;
ERR_FAIL_COND_V_MSG((uint32_t)p_data.size() != size_bytes, RID(),
"Default index buffer initializer array size (" + itos(p_data.size()) + ") does not match format required size (" + itos(size_bytes) + ").");
const uint8_t *r = p_data.ptr();
if (p_format == INDEX_BUFFER_FORMAT_UINT16) {
const uint16_t *index16 = (const uint16_t *)r;
for (uint32_t i = 0; i < p_index_count; i++) {
if (p_use_restart_indices && index16[i] == 0xFFFF) {
continue; //restart index, ignore
}
index_buffer.max_index = MAX(index16[i], index_buffer.max_index);
}
} else {
const uint32_t *index32 = (const uint32_t *)r;
for (uint32_t i = 0; i < p_index_count; i++) {
if (p_use_restart_indices && index32[i] == 0xFFFFFFFF) {
continue; //restart index, ignore
}
index_buffer.max_index = MAX(index32[i], index_buffer.max_index);
}
}
} else {
index_buffer.max_index = 0xFFFFFFFF;
}
#else
index_buffer.max_index = 0xFFFFFFFF;
#endif
_buffer_allocate(&index_buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&index_buffer, 0, r, data_size);
_buffer_memory_barrier(index_buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT, false);
}
return index_buffer_owner.make_rid(index_buffer);
}
RID RenderingDeviceVulkan::index_array_create(RID p_index_buffer, uint32_t p_index_offset, uint32_t p_index_count) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(!index_buffer_owner.owns(p_index_buffer), RID());
IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_index_buffer);
ERR_FAIL_COND_V(p_index_count == 0, RID());
ERR_FAIL_COND_V(p_index_offset + p_index_count > index_buffer->index_count, RID());
IndexArray index_array;
index_array.max_index = index_buffer->max_index;
index_array.buffer = index_buffer->buffer;
index_array.offset = p_index_offset;
index_array.indices = p_index_count;
index_array.index_type = index_buffer->index_type;
index_array.supports_restart_indices = index_buffer->supports_restart_indices;
RID id = index_array_owner.make_rid(index_array);
_add_dependency(id, p_index_buffer);
return id;
}
/****************/
/**** SHADER ****/
/****************/
static const char *shader_stage_names[RenderingDevice::SHADER_STAGE_MAX] = {
"Vertex",
"Fragment",
"TesselationControl",
"TesselationEvaluation",
"Compute"
};
static const char *shader_uniform_names[RenderingDevice::UNIFORM_TYPE_MAX] = {
"Sampler", "CombinedSampler", "Texture", "Image", "TextureBuffer", "SamplerTextureBuffer", "ImageBuffer", "UniformBuffer", "StorageBuffer", "InputAttachment"
};
static VkShaderStageFlagBits shader_stage_masks[RenderingDevice::SHADER_STAGE_MAX] = {
VK_SHADER_STAGE_VERTEX_BIT,
VK_SHADER_STAGE_FRAGMENT_BIT,
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
VK_SHADER_STAGE_COMPUTE_BIT,
};
String RenderingDeviceVulkan::_shader_uniform_debug(RID p_shader, int p_set) {
String ret;
const Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, String());
for (int i = 0; i < shader->sets.size(); i++) {
if (p_set >= 0 && i != p_set) {
continue;
}
for (int j = 0; j < shader->sets[i].uniform_info.size(); j++) {
const UniformInfo &ui = shader->sets[i].uniform_info[j];
if (!ret.is_empty()) {
ret += "\n";
}
ret += "Set: " + itos(i) + " Binding: " + itos(ui.binding) + " Type: " + shader_uniform_names[ui.type] + " Length: " + itos(ui.length);
}
}
return ret;
}
#if 0
bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLayoutBinding> > &bindings, Vector<Vector<UniformInfo> > &uniform_infos, const glslang::TObjectReflection &reflection, RenderingDevice::ShaderStage p_stage, Shader::PushConstant &push_constant, String *r_error) {
VkDescriptorSetLayoutBinding layout_binding;
UniformInfo info;
switch (reflection.getType()->getBasicType()) {
case glslang::EbtSampler: {
//print_line("DEBUG: IsSampler");
if (reflection.getType()->getSampler().dim == glslang::EsdBuffer) {
//texture buffers
if (reflection.getType()->getSampler().isCombined()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
info.type = UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER;
//print_line("DEBUG: SAMPLER: texel combined");
} else if (reflection.getType()->getSampler().isTexture()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
info.type = UNIFORM_TYPE_TEXTURE_BUFFER;
//print_line("DEBUG: SAMPLER: texel alone");
} else if (reflection.getType()->getSampler().isImage()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
info.type = UNIFORM_TYPE_IMAGE_BUFFER;
//print_line("DEBUG: SAMPLER: texel buffer");
} else {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' is of unsupported buffer type.";
}
return false;
}
} else if (reflection.getType()->getSampler().isCombined()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
info.type = UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
//print_line("DEBUG: SAMPLER: combined");
} else if (reflection.getType()->getSampler().isPureSampler()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
info.type = UNIFORM_TYPE_SAMPLER;
//print_line("DEBUG: SAMPLER: sampler");
} else if (reflection.getType()->getSampler().isTexture()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
info.type = UNIFORM_TYPE_TEXTURE;
//print_line("DEBUG: SAMPLER: image");
} else if (reflection.getType()->getSampler().isImage()) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
info.type = UNIFORM_TYPE_IMAGE;
//print_line("DEBUG: SAMPLER: storage image");
} else {
//print_line("DEBUG: sampler unknown");
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' is of unsupported sampler type.";
}
return false;
}
if (reflection.getType()->isArray()) {
layout_binding.descriptorCount = reflection.getType()->getArraySizes()->getCumulativeSize();
//print_line("DEBUG: array of size: " + itos(layout_binding.descriptorCount));
} else {
layout_binding.descriptorCount = 1;
}
info.length = layout_binding.descriptorCount;
} break;
/*case glslang::EbtStruct: {
print_line("DEBUG: Struct");
} break;*/
case glslang::EbtBlock: {
//print_line("DEBUG: Block");
if (reflection.getType()->getQualifier().storage == glslang::EvqUniform) {
if (reflection.getType()->getQualifier().layoutPushConstant) {
uint32_t len = reflection.size;
if (push_constant.push_constant_size != 0 && push_constant.push_constant_size != len) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' push constants for different stages should all be the same size.";
return false;
}
push_constant.push_constant_size = len;
push_constant.push_constants_vk_stage |= shader_stage_masks[p_stage];
return true;
}
//print_line("DEBUG: Uniform buffer");
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
info.type = UNIFORM_TYPE_UNIFORM_BUFFER;
} else if (reflection.getType()->getQualifier().storage == glslang::EvqBuffer) {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
info.type = UNIFORM_TYPE_STORAGE_BUFFER;
//print_line("DEBUG: Storage buffer");
} else {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' is of unsupported block type: (" + itos(reflection.getType()->getQualifier().storage) + ").";
}
return false;
}
if (reflection.getType()->isArray()) {
layout_binding.descriptorCount = reflection.getType()->getArraySizes()->getCumulativeSize();
//print_line("DEBUG: array of size: " + itos(layout_binding.descriptorCount));
} else {
layout_binding.descriptorCount = 1;
}
info.length = reflection.size;
} break;
/*case glslang::EbtReference: {
} break;*/
/*case glslang::EbtAtomicUint: {
} break;*/
default: {
if (reflection.getType()->getQualifier().hasOffset() || reflection.name.find(".") != std::string::npos) {
//member of uniform block?
return true;
}
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' unsupported uniform type.";
}
return false;
}
}
if (!reflection.getType()->getQualifier().hasBinding()) {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' lacks a binding number.";
}
return false;
}
uint32_t set = reflection.getType()->getQualifier().hasSet() ? reflection.getType()->getQualifier().layoutSet : 0;
if (set >= MAX_UNIFORM_SETS) {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ").";
}
return false;
}
if (set >= limits.maxBoundDescriptorSets) {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' uses a set (" + itos(set) + ") index larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ").";
}
return false;
}
uint32_t binding = reflection.getType()->getQualifier().layoutBinding;
if (set < (uint32_t)bindings.size()) {
//check if this already exists
for (int i = 0; i < bindings[set].size(); i++) {
if (bindings[set][i].binding == binding) {
//already exists, verify that it's the same type
if (bindings[set][i].descriptorType != layout_binding.descriptorType) {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(binding) + " with different uniform type.";
}
return false;
}
//also, verify that it's the same size
if (bindings[set][i].descriptorCount != layout_binding.descriptorCount || uniform_infos[set][i].length != info.length) {
if (r_error) {
*r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(binding) + " with different uniform size.";
}
return false;
}
//just append stage mask and return
bindings.write[set].write[i].stageFlags |= shader_stage_masks[p_stage];
uniform_infos.write[set].write[i].stages |= 1 << p_stage;
return true;
}
}
}
layout_binding.binding = binding;
layout_binding.stageFlags = shader_stage_masks[p_stage];
layout_binding.pImmutableSamplers = nullptr; //no support for this yet
info.stages = 1 << p_stage;
info.binding = binding;
if (set >= (uint32_t)bindings.size()) {
bindings.resize(set + 1);
uniform_infos.resize(set + 1);
}
#if 0
print_line("stage: " + String(shader_stage_names[p_stage]) + " set: " + itos(set) + " binding: " + itos(info.binding) + " type:" + shader_uniform_names[info.type] + " length: " + itos(info.length));
#endif
bindings.write[set].push_back(layout_binding);
uniform_infos.write[set].push_back(info);
return true;
}
#endif
//version 1: initial
//version 2: Added shader name
#define SHADER_BINARY_VERSION 2
String RenderingDeviceVulkan::shader_get_binary_cache_key() const {
return "Vulkan-SV" + itos(SHADER_BINARY_VERSION);
}
struct RenderingDeviceVulkanShaderBinaryDataBinding {
uint32_t type;
uint32_t binding;
uint32_t stages;
uint32_t length; //size of arrays (in total elements), or ubos (in bytes * total elements)
};
struct RenderingDeviceVulkanShaderBinarySpecializationConstant {
uint32_t type;
uint32_t constant_id;
union {
uint32_t int_value;
float float_value;
bool bool_value;
};
uint32_t stage_flags;
};
struct RenderingDeviceVulkanShaderBinaryData {
uint32_t vertex_input_mask;
uint32_t fragment_outputs;
uint32_t specialization_constant_count;
uint32_t is_compute;
uint32_t compute_local_size[3];
uint32_t set_count;
uint32_t push_constant_size;
uint32_t push_constants_vk_stage;
uint32_t stage_count;
uint32_t shader_name_len;
};
Vector<uint8_t> RenderingDeviceVulkan::shader_compile_binary_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name) {
RenderingDeviceVulkanShaderBinaryData binary_data;
binary_data.vertex_input_mask = 0;
binary_data.fragment_outputs = 0;
binary_data.specialization_constant_count = 0;
binary_data.is_compute = 0;
binary_data.compute_local_size[0] = 0;
binary_data.compute_local_size[1] = 0;
binary_data.compute_local_size[2] = 0;
binary_data.set_count = 0;
binary_data.push_constant_size = 0;
binary_data.push_constants_vk_stage = 0;
Vector<Vector<RenderingDeviceVulkanShaderBinaryDataBinding>> uniform_info; //set bindings
Vector<RenderingDeviceVulkanShaderBinarySpecializationConstant> specialization_constants;
uint32_t stages_processed = 0;
for (int i = 0; i < p_spirv.size(); i++) {
if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) {
binary_data.is_compute = true;
ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, Vector<uint8_t>(),
"Compute shaders can only receive one stage, dedicated to compute.");
}
ERR_FAIL_COND_V_MSG(stages_processed & (1 << p_spirv[i].shader_stage), Vector<uint8_t>(),
"Stage " + String(shader_stage_names[p_spirv[i].shader_stage]) + " submitted more than once.");
{
SpvReflectShaderModule module;
const uint8_t *spirv = p_spirv[i].spir_v.ptr();
SpvReflectResult result = spvReflectCreateShaderModule(p_spirv[i].spir_v.size(), spirv, &module);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed parsing shader.");
if (binary_data.is_compute) {
binary_data.compute_local_size[0] = module.entry_points->local_size.x;
binary_data.compute_local_size[1] = module.entry_points->local_size.y;
binary_data.compute_local_size[2] = module.entry_points->local_size.z;
}
uint32_t binding_count = 0;
result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating descriptor bindings.");
uint32_t stage = p_spirv[i].shader_stage;
if (binding_count > 0) {
//Parse bindings
Vector<SpvReflectDescriptorBinding *> bindings;
bindings.resize(binding_count);
result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, bindings.ptrw());
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed getting descriptor bindings.");
for (uint32_t j = 0; j < binding_count; j++) {
const SpvReflectDescriptorBinding &binding = *bindings[j];
RenderingDeviceVulkanShaderBinaryDataBinding info;
bool need_array_dimensions = false;
bool need_block_size = false;
switch (binding.descriptor_type) {
case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER: {
info.type = UNIFORM_TYPE_SAMPLER;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
info.type = UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
info.type = UNIFORM_TYPE_TEXTURE;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
info.type = UNIFORM_TYPE_IMAGE;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
info.type = UNIFORM_TYPE_TEXTURE_BUFFER;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
info.type = UNIFORM_TYPE_IMAGE_BUFFER;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
info.type = UNIFORM_TYPE_UNIFORM_BUFFER;
need_block_size = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
info.type = UNIFORM_TYPE_STORAGE_BUFFER;
need_block_size = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
ERR_PRINT("Dynamic uniform buffer not supported.");
continue;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
ERR_PRINT("Dynamic storage buffer not supported.");
continue;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
info.type = UNIFORM_TYPE_INPUT_ATTACHMENT;
need_array_dimensions = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
ERR_PRINT("Acceleration structure not supported.");
continue;
} break;
}
if (need_array_dimensions) {
if (binding.array.dims_count == 0) {
info.length = 1;
} else {
for (uint32_t k = 0; k < binding.array.dims_count; k++) {
if (k == 0) {
info.length = binding.array.dims[0];
} else {
info.length *= binding.array.dims[k];
}
}
}
} else if (need_block_size) {
info.length = binding.block.size;
} else {
info.length = 0;
}
info.binding = binding.binding;
uint32_t set = binding.set;
ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, Vector<uint8_t>(),
"On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ").");
ERR_FAIL_COND_V_MSG(set >= limits.maxBoundDescriptorSets, Vector<uint8_t>(),
"On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' uses a set (" + itos(set) + ") index larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ").");
if (set < (uint32_t)uniform_info.size()) {
//check if this already exists
bool exists = false;
for (int k = 0; k < uniform_info[set].size(); k++) {
if (uniform_info[set][k].binding == (uint32_t)info.binding) {
//already exists, verify that it's the same type
ERR_FAIL_COND_V_MSG(uniform_info[set][k].type != info.type, Vector<uint8_t>(),
"On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(info.binding) + " with different uniform type.");
//also, verify that it's the same size
ERR_FAIL_COND_V_MSG(uniform_info[set][k].length != info.length, Vector<uint8_t>(),
"On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(info.binding) + " with different uniform size.");
//just append stage mask and return
uniform_info.write[set].write[k].stages |= 1 << stage;
exists = true;
}
}
if (exists) {
continue; //merged
}
}
info.stages = 1 << stage;
if (set >= (uint32_t)uniform_info.size()) {
uniform_info.resize(set + 1);
}
uniform_info.write[set].push_back(info);
}
}
{
//specialization constants
uint32_t sc_count = 0;
result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating specialization constants.");
if (sc_count) {
Vector<SpvReflectSpecializationConstant *> spec_constants;
spec_constants.resize(sc_count);
result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, spec_constants.ptrw());
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining specialization constants.");
for (uint32_t j = 0; j < sc_count; j++) {
int32_t existing = -1;
RenderingDeviceVulkanShaderBinarySpecializationConstant sconst;
sconst.constant_id = spec_constants[j]->constant_id;
switch (spec_constants[j]->constant_type) {
case SPV_REFLECT_SPECIALIZATION_CONSTANT_BOOL: {
sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL;
sconst.bool_value = spec_constants[j]->default_value.int_bool_value != 0;
} break;
case SPV_REFLECT_SPECIALIZATION_CONSTANT_INT: {
sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT;
sconst.int_value = spec_constants[j]->default_value.int_bool_value;
} break;
case SPV_REFLECT_SPECIALIZATION_CONSTANT_FLOAT: {
sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT;
sconst.float_value = spec_constants[j]->default_value.float_value;
} break;
}
sconst.stage_flags = 1 << p_spirv[i].shader_stage;
for (int k = 0; k < specialization_constants.size(); k++) {
if (specialization_constants[k].constant_id == sconst.constant_id) {
ERR_FAIL_COND_V_MSG(specialization_constants[k].type != sconst.type, Vector<uint8_t>(), "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ.");
ERR_FAIL_COND_V_MSG(specialization_constants[k].int_value != sconst.int_value, Vector<uint8_t>(), "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ.");
existing = k;
break;
}
}
if (existing > 0) {
specialization_constants.write[existing].stage_flags |= sconst.stage_flags;
} else {
specialization_constants.push_back(sconst);
}
}
}
}
if (stage == SHADER_STAGE_VERTEX) {
uint32_t iv_count = 0;
result = spvReflectEnumerateInputVariables(&module, &iv_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating input variables.");
if (iv_count) {
Vector<SpvReflectInterfaceVariable *> input_vars;
input_vars.resize(iv_count);
result = spvReflectEnumerateInputVariables(&module, &iv_count, input_vars.ptrw());
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining input variables.");
for (uint32_t j = 0; j < iv_count; j++) {
if (input_vars[j] && input_vars[j]->decoration_flags == 0) { //regular input
binary_data.vertex_input_mask |= (1 << uint32_t(input_vars[j]->location));
}
}
}
}
if (stage == SHADER_STAGE_FRAGMENT) {
uint32_t ov_count = 0;
result = spvReflectEnumerateOutputVariables(&module, &ov_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating output variables.");
if (ov_count) {
Vector<SpvReflectInterfaceVariable *> output_vars;
output_vars.resize(ov_count);
result = spvReflectEnumerateOutputVariables(&module, &ov_count, output_vars.ptrw());
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining output variables.");
for (uint32_t j = 0; j < ov_count; j++) {
const SpvReflectInterfaceVariable *refvar = output_vars[j];
if (refvar != nullptr && refvar->built_in != SpvBuiltInFragDepth) {
binary_data.fragment_outputs |= 1 << refvar->location;
}
}
}
}
uint32_t pc_count = 0;
result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating push constants.");
if (pc_count) {
ERR_FAIL_COND_V_MSG(pc_count > 1, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "': Only one push constant is supported, which should be the same across shader stages.");
Vector<SpvReflectBlockVariable *> pconstants;
pconstants.resize(pc_count);
result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, pconstants.ptrw());
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining push constants.");
#if 0
if (pconstants[0] == nullptr) {
FileAccess *f = FileAccess::open("res://popo.spv", FileAccess::WRITE);
f->store_buffer((const uint8_t *)&SpirV[0], SpirV.size() * sizeof(uint32_t));
memdelete(f);
}
#endif
ERR_FAIL_COND_V_MSG(binary_data.push_constant_size && binary_data.push_constant_size != pconstants[0]->size, Vector<uint8_t>(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages.");
binary_data.push_constant_size = pconstants[0]->size;
binary_data.push_constants_vk_stage |= shader_stage_masks[stage];
//print_line("Stage: " + String(shader_stage_names[stage]) + " push constant of size=" + itos(push_constant.push_constant_size));
}
// Destroy the reflection data when no longer required.
spvReflectDestroyShaderModule(&module);
}
stages_processed |= (1 << p_spirv[i].shader_stage);
}
Vector<Vector<uint8_t>> compressed_stages;
Vector<uint32_t> smolv_size;
Vector<uint32_t> zstd_size; //if 0, stdno t used
uint32_t stages_binary_size = 0;
bool strip_debug = false;
for (int i = 0; i < p_spirv.size(); i++) {
smolv::ByteArray smolv;
if (!smolv::Encode(p_spirv[i].spir_v.ptr(), p_spirv[i].spir_v.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) {
ERR_FAIL_V_MSG(Vector<uint8_t>(), "Error compressing shader stage :" + String(shader_stage_names[p_spirv[i].shader_stage]));
} else {
smolv_size.push_back(smolv.size());
{ //zstd
Vector<uint8_t> zstd;
zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD));
int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD);
if (dst_size > 0 && (uint32_t)dst_size < smolv.size()) {
zstd_size.push_back(dst_size);
zstd.resize(dst_size);
compressed_stages.push_back(zstd);
} else {
Vector<uint8_t> smv;
smv.resize(smolv.size());
memcpy(smv.ptrw(), &smolv[0], smolv.size());
zstd_size.push_back(0); //not using zstd
compressed_stages.push_back(smv);
}
}
}
uint32_t s = compressed_stages[i].size();
if (s % 4 != 0) {
s += 4 - (s % 4);
}
stages_binary_size += s;
}
binary_data.specialization_constant_count = specialization_constants.size();
binary_data.set_count = uniform_info.size();
binary_data.stage_count = p_spirv.size();
CharString shader_name_utf = p_shader_name.utf8();
binary_data.shader_name_len = shader_name_utf.length();
uint32_t total_size = sizeof(uint32_t) * 3; //header + version + main datasize;
total_size += sizeof(RenderingDeviceVulkanShaderBinaryData);
total_size += binary_data.shader_name_len;
if ((binary_data.shader_name_len % 4) != 0) { //alignment rules are really strange
total_size += 4 - (binary_data.shader_name_len % 4);
}
for (int i = 0; i < uniform_info.size(); i++) {
total_size += sizeof(uint32_t);
total_size += uniform_info[i].size() * sizeof(RenderingDeviceVulkanShaderBinaryDataBinding);
}
total_size += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size();
total_size += compressed_stages.size() * sizeof(uint32_t) * 3; //sizes
total_size += stages_binary_size;
Vector<uint8_t> ret;
ret.resize(total_size);
uint32_t offset = 0;
{
uint8_t *binptr = ret.ptrw();
binptr[0] = 'G';
binptr[1] = 'V';
binptr[2] = 'B';
binptr[3] = 'D'; //godot vulkan binary data
offset += 4;
encode_uint32(SHADER_BINARY_VERSION, binptr + offset);
offset += sizeof(uint32_t);
encode_uint32(sizeof(RenderingDeviceVulkanShaderBinaryData), binptr + offset);
offset += sizeof(uint32_t);
memcpy(binptr + offset, &binary_data, sizeof(RenderingDeviceVulkanShaderBinaryData));
offset += sizeof(RenderingDeviceVulkanShaderBinaryData);
memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len);
offset += binary_data.shader_name_len;
if ((binary_data.shader_name_len % 4) != 0) { //alignment rules are really strange
offset += 4 - (binary_data.shader_name_len % 4);
}
for (int i = 0; i < uniform_info.size(); i++) {
int count = uniform_info[i].size();
encode_uint32(count, binptr + offset);
offset += sizeof(uint32_t);
if (count > 0) {
memcpy(binptr + offset, uniform_info[i].ptr(), sizeof(RenderingDeviceVulkanShaderBinaryDataBinding) * count);
offset += sizeof(RenderingDeviceVulkanShaderBinaryDataBinding) * count;
}
}
if (specialization_constants.size()) {
memcpy(binptr + offset, specialization_constants.ptr(), sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size());
offset += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size();
}
for (int i = 0; i < compressed_stages.size(); i++) {
encode_uint32(p_spirv[i].shader_stage, binptr + offset);
offset += sizeof(uint32_t);
encode_uint32(smolv_size[i], binptr + offset);
offset += sizeof(uint32_t);
encode_uint32(zstd_size[i], binptr + offset);
offset += sizeof(uint32_t);
memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size());
uint32_t s = compressed_stages[i].size();
if (s % 4 != 0) {
s += 4 - (s % 4);
}
offset += s;
}
ERR_FAIL_COND_V(offset != (uint32_t)ret.size(), Vector<uint8_t>());
}
return ret;
}
RID RenderingDeviceVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary) {
const uint8_t *binptr = p_shader_binary.ptr();
uint32_t binsize = p_shader_binary.size();
uint32_t read_offset = 0;
//consistency check
ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(RenderingDeviceVulkanShaderBinaryData), RID());
ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'V' || binptr[2] != 'B' || binptr[3] != 'D', RID());
uint32_t bin_version = decode_uint32(binptr + 4);
ERR_FAIL_COND_V(bin_version > SHADER_BINARY_VERSION, RID());
uint32_t bin_data_size = decode_uint32(binptr + 8);
const RenderingDeviceVulkanShaderBinaryData &binary_data = *(const RenderingDeviceVulkanShaderBinaryData *)(binptr + 12);
Shader::PushConstant push_constant;
push_constant.push_constant_size = binary_data.push_constant_size;
push_constant.push_constants_vk_stage = binary_data.push_constants_vk_stage;
uint32_t vertex_input_mask = binary_data.vertex_input_mask;
uint32_t fragment_outputs = binary_data.fragment_outputs;
bool is_compute = binary_data.is_compute;
uint32_t compute_local_size[3] = { binary_data.compute_local_size[0], binary_data.compute_local_size[1], binary_data.compute_local_size[2] };
read_offset += sizeof(uint32_t) * 3 + bin_data_size;
String name;
if (binary_data.shader_name_len) {
name.parse_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len);
read_offset += binary_data.shader_name_len;
if ((binary_data.shader_name_len % 4) != 0) { //alignment rules are really strange
read_offset += 4 - (binary_data.shader_name_len % 4);
}
}
Vector<Vector<VkDescriptorSetLayoutBinding>> set_bindings;
Vector<Vector<UniformInfo>> uniform_info;
set_bindings.resize(binary_data.set_count);
uniform_info.resize(binary_data.set_count);
for (uint32_t i = 0; i < binary_data.set_count; i++) {
ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, RID());
uint32_t set_count = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
const RenderingDeviceVulkanShaderBinaryDataBinding *set_ptr = (const RenderingDeviceVulkanShaderBinaryDataBinding *)(binptr + read_offset);
uint32_t set_size = set_count * sizeof(RenderingDeviceVulkanShaderBinaryDataBinding);
ERR_FAIL_COND_V(read_offset + set_size >= binsize, RID());
for (uint32_t j = 0; j < set_count; j++) {
UniformInfo info;
info.type = UniformType(set_ptr[j].type);
info.length = set_ptr[j].length;
info.binding = set_ptr[j].binding;
info.stages = set_ptr[j].stages;
VkDescriptorSetLayoutBinding layout_binding;
layout_binding.pImmutableSamplers = nullptr;
layout_binding.binding = set_ptr[j].binding;
layout_binding.descriptorCount = 1;
layout_binding.stageFlags = 0;
for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) {
if (set_ptr[j].stages & (1 << k)) {
layout_binding.stageFlags |= shader_stage_masks[k];
}
}
switch (info.type) {
case UNIFORM_TYPE_SAMPLER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_TEXTURE: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_IMAGE: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_TEXTURE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_IMAGE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
} break;
default: {
ERR_FAIL_V(RID());
}
}
set_bindings.write[i].push_back(layout_binding);
uniform_info.write[i].push_back(info);
}
read_offset += set_size;
}
ERR_FAIL_COND_V(read_offset + binary_data.specialization_constant_count * sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) >= binsize, RID());
Vector<Shader::SpecializationConstant> specialization_constants;
for (uint32_t i = 0; i < binary_data.specialization_constant_count; i++) {
const RenderingDeviceVulkanShaderBinarySpecializationConstant &src_sc = *(const RenderingDeviceVulkanShaderBinarySpecializationConstant *)(binptr + read_offset);
Shader::SpecializationConstant sc;
sc.constant.int_value = src_sc.int_value;
sc.constant.type = PipelineSpecializationConstantType(src_sc.type);
sc.constant.constant_id = src_sc.constant_id;
sc.stage_flags = src_sc.stage_flags;
specialization_constants.push_back(sc);
read_offset += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant);
}
Vector<Vector<uint8_t>> stage_spirv_data;
Vector<ShaderStage> stage_type;
for (uint32_t i = 0; i < binary_data.stage_count; i++) {
ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, RID());
uint32_t stage = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
uint32_t smolv_size = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
uint32_t zstd_size = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
uint32_t buf_size = (zstd_size > 0) ? zstd_size : smolv_size;
Vector<uint8_t> smolv;
const uint8_t *src_smolv = nullptr;
if (zstd_size > 0) {
//decompress to smolv
smolv.resize(smolv_size);
int dec_smolv_size = Compression::decompress(smolv.ptrw(), smolv.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD);
ERR_FAIL_COND_V(dec_smolv_size != (int32_t)smolv_size, RID());
src_smolv = smolv.ptr();
} else {
src_smolv = binptr + read_offset;
}
Vector<uint8_t> spirv;
uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size);
spirv.resize(spirv_size);
if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) {
ERR_FAIL_V_MSG(RID(), "Malformed smolv input uncompressing shader stage:" + String(shader_stage_names[stage]));
}
stage_spirv_data.push_back(spirv);
stage_type.push_back(ShaderStage(stage));
if (buf_size % 4 != 0) {
buf_size += 4 - (buf_size % 4);
}
ERR_FAIL_COND_V(read_offset + buf_size > binsize, RID());
read_offset += buf_size;
}
ERR_FAIL_COND_V(read_offset != binsize, RID());
//all good, let's create modules
_THREAD_SAFE_METHOD_
Shader shader;
shader.vertex_input_mask = vertex_input_mask;
shader.fragment_output_mask = fragment_outputs;
shader.push_constant = push_constant;
shader.is_compute = is_compute;
shader.compute_local_size[0] = compute_local_size[0];
shader.compute_local_size[1] = compute_local_size[1];
shader.compute_local_size[2] = compute_local_size[2];
shader.specialization_constants = specialization_constants;
shader.name = name;
String error_text;
bool success = true;
for (int i = 0; i < stage_spirv_data.size(); i++) {
VkShaderModuleCreateInfo shader_module_create_info;
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_module_create_info.pNext = nullptr;
shader_module_create_info.flags = 0;
shader_module_create_info.codeSize = stage_spirv_data[i].size();
const uint8_t *r = stage_spirv_data[i].ptr();
shader_module_create_info.pCode = (const uint32_t *)r;
VkShaderModule module;
VkResult res = vkCreateShaderModule(device, &shader_module_create_info, nullptr, &module);
if (res) {
success = false;
error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(shader_stage_names[stage_type[i]]);
break;
}
const VkShaderStageFlagBits shader_stage_bits[SHADER_STAGE_MAX] = {
VK_SHADER_STAGE_VERTEX_BIT,
VK_SHADER_STAGE_FRAGMENT_BIT,
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
VK_SHADER_STAGE_COMPUTE_BIT,
};
VkPipelineShaderStageCreateInfo shader_stage;
shader_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stage.pNext = nullptr;
shader_stage.flags = 0;
shader_stage.stage = shader_stage_bits[stage_type[i]];
shader_stage.module = module;
shader_stage.pName = "main";
shader_stage.pSpecializationInfo = nullptr;
shader.pipeline_stages.push_back(shader_stage);
}
//proceed to create descriptor sets
if (success) {
for (int i = 0; i < set_bindings.size(); i++) {
//empty ones are fine if they were not used according to spec (binding count will be 0)
VkDescriptorSetLayoutCreateInfo layout_create_info;
layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
layout_create_info.pNext = nullptr;
layout_create_info.flags = 0;
layout_create_info.bindingCount = set_bindings[i].size();
layout_create_info.pBindings = set_bindings[i].ptr();
VkDescriptorSetLayout layout;
VkResult res = vkCreateDescriptorSetLayout(device, &layout_create_info, nullptr, &layout);
if (res) {
error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i);
success = false;
break;
}
Shader::Set set;
set.descriptor_set_layout = layout;
set.uniform_info = uniform_info[i];
//sort and hash
set.uniform_info.sort();
uint32_t format = 0; //no format, default
if (set.uniform_info.size()) {
//has data, needs an actual format;
UniformSetFormat usformat;
usformat.uniform_info = set.uniform_info;
Map<UniformSetFormat, uint32_t>::Element *E = uniform_set_format_cache.find(usformat);
if (E) {
format = E->get();
} else {
format = uniform_set_format_cache.size() + 1;
uniform_set_format_cache.insert(usformat, format);
}
}
shader.sets.push_back(set);
shader.set_formats.push_back(format);
}
}
if (success) {
//create pipeline layout
VkPipelineLayoutCreateInfo pipeline_layout_create_info;
pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipeline_layout_create_info.pNext = nullptr;
pipeline_layout_create_info.flags = 0;
pipeline_layout_create_info.setLayoutCount = shader.sets.size();
Vector<VkDescriptorSetLayout> layouts;
layouts.resize(shader.sets.size());
for (int i = 0; i < layouts.size(); i++) {
layouts.write[i] = shader.sets[i].descriptor_set_layout;
}
pipeline_layout_create_info.pSetLayouts = layouts.ptr();
// Needs to be declared in this outer scope, otherwise it may not outlive its assignment
// to pipeline_layout_create_info.
VkPushConstantRange push_constant_range;
if (push_constant.push_constant_size) {
push_constant_range.stageFlags = push_constant.push_constants_vk_stage;
push_constant_range.offset = 0;
push_constant_range.size = push_constant.push_constant_size;
pipeline_layout_create_info.pushConstantRangeCount = 1;
pipeline_layout_create_info.pPushConstantRanges = &push_constant_range;
} else {
pipeline_layout_create_info.pushConstantRangeCount = 0;
pipeline_layout_create_info.pPushConstantRanges = nullptr;
}
VkResult err = vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, &shader.pipeline_layout);
if (err) {
error_text = "Error (" + itos(err) + ") creating pipeline layout.";
success = false;
}
}
if (!success) {
//clean up if failed
for (int i = 0; i < shader.pipeline_stages.size(); i++) {
vkDestroyShaderModule(device, shader.pipeline_stages[i].module, nullptr);
}
for (int i = 0; i < shader.sets.size(); i++) {
vkDestroyDescriptorSetLayout(device, shader.sets[i].descriptor_set_layout, nullptr);
}
ERR_FAIL_V_MSG(RID(), error_text);
}
return shader_owner.make_rid(shader);
}
uint32_t RenderingDeviceVulkan::shader_get_vertex_input_attribute_mask(RID p_shader) {
_THREAD_SAFE_METHOD_
const Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, 0);
return shader->vertex_input_mask;
}
/******************/
/**** UNIFORMS ****/
/******************/
RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
Buffer buffer;
Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
ERR_FAIL_COND_V(err != OK, RID());
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&buffer, 0, r, data_size);
_buffer_memory_barrier(buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, false);
}
return uniform_buffer_owner.make_rid(buffer);
}
RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, uint32_t p_usage) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
Buffer buffer;
buffer.usage = p_usage;
uint32_t flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
if (p_usage & STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT) {
flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
}
Error err = _buffer_allocate(&buffer, p_size_bytes, flags, VMA_MEMORY_USAGE_GPU_ONLY);
ERR_FAIL_COND_V(err != OK, RID());
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&buffer, 0, r, data_size);
_buffer_memory_barrier(buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, false);
}
return storage_buffer_owner.make_rid(buffer);
}
RID RenderingDeviceVulkan::texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector<uint8_t> &p_data) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(),
"Creating buffers with data is forbidden during creation of a draw list");
uint32_t element_size = get_format_vertex_size(p_format);
ERR_FAIL_COND_V_MSG(element_size == 0, RID(), "Format requested is not supported for texture buffers");
uint64_t size_bytes = uint64_t(element_size) * p_size_elements;
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != size_bytes, RID());
TextureBuffer texture_buffer;
Error err = _buffer_allocate(&texture_buffer.buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
ERR_FAIL_COND_V(err != OK, RID());
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&texture_buffer.buffer, 0, r, data_size);
_buffer_memory_barrier(texture_buffer.buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, false);
}
VkBufferViewCreateInfo view_create_info;
view_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
view_create_info.pNext = nullptr;
view_create_info.flags = 0;
view_create_info.buffer = texture_buffer.buffer.buffer;
view_create_info.format = vulkan_formats[p_format];
view_create_info.offset = 0;
view_create_info.range = size_bytes;
texture_buffer.view = VK_NULL_HANDLE;
VkResult res = vkCreateBufferView(device, &view_create_info, nullptr, &texture_buffer.view);
if (res) {
_buffer_free(&texture_buffer.buffer);
ERR_FAIL_V_MSG(RID(), "Unable to create buffer view, error " + itos(res) + ".");
}
//allocate the view
return texture_buffer_owner.make_rid(texture_buffer);
}
RenderingDeviceVulkan::DescriptorPool *RenderingDeviceVulkan::_descriptor_pool_allocate(const DescriptorPoolKey &p_key) {
if (!descriptor_pools.has(p_key)) {
descriptor_pools[p_key] = Set<DescriptorPool *>();
}
DescriptorPool *pool = nullptr;
for (Set<DescriptorPool *>::Element *E = descriptor_pools[p_key].front(); E; E = E->next()) {
if (E->get()->usage < max_descriptors_per_pool) {
pool = E->get();
break;
}
}
if (!pool) {
//create a new one
pool = memnew(DescriptorPool);
pool->usage = 0;
VkDescriptorPoolCreateInfo descriptor_pool_create_info;
descriptor_pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptor_pool_create_info.pNext = nullptr;
descriptor_pool_create_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; // can't think how somebody may NOT need this flag..
descriptor_pool_create_info.maxSets = max_descriptors_per_pool;
Vector<VkDescriptorPoolSize> sizes;
//here comes more vulkan API strangeness
if (p_key.uniform_type[UNIFORM_TYPE_SAMPLER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_SAMPLER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_SAMPLER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_TEXTURE]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_TEXTURE] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_IMAGE]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_IMAGE] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_TEXTURE_BUFFER] || p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
s.descriptorCount = (p_key.uniform_type[UNIFORM_TYPE_TEXTURE_BUFFER] + p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER]) * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_IMAGE_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_IMAGE_BUFFER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT] * max_descriptors_per_pool;
sizes.push_back(s);
}
descriptor_pool_create_info.poolSizeCount = sizes.size();
descriptor_pool_create_info.pPoolSizes = sizes.ptr();
VkResult res = vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr, &pool->pool);
if (res) {
memdelete(pool);
ERR_FAIL_COND_V_MSG(res, nullptr, "vkCreateDescriptorPool failed with error " + itos(res) + ".");
}
descriptor_pools[p_key].insert(pool);
}
pool->usage++;
return pool;
}
void RenderingDeviceVulkan::_descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND(!descriptor_pools[p_key].has(p_pool));
#endif
ERR_FAIL_COND(p_pool->usage == 0);
p_pool->usage--;
if (p_pool->usage == 0) {
vkDestroyDescriptorPool(device, p_pool->pool, nullptr);
descriptor_pools[p_key].erase(p_pool);
memdelete(p_pool);
if (descriptor_pools[p_key].is_empty()) {
descriptor_pools.erase(p_key);
}
}
}
RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_uniforms.size() == 0, RID());
Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(p_shader_set >= (uint32_t)shader->sets.size() || shader->sets[p_shader_set].uniform_info.size() == 0, RID(),
"Desired set (" + itos(p_shader_set) + ") not used by shader.");
//see that all sets in shader are satisfied
const Shader::Set &set = shader->sets[p_shader_set];
uint32_t uniform_count = p_uniforms.size();
const Uniform *uniforms = p_uniforms.ptr();
uint32_t set_uniform_count = set.uniform_info.size();
const UniformInfo *set_uniforms = set.uniform_info.ptr();
Vector<VkWriteDescriptorSet> writes;
DescriptorPoolKey pool_key;
//to keep them alive until update call
List<Vector<VkDescriptorBufferInfo>> buffer_infos;
List<Vector<VkBufferView>> buffer_views;
List<Vector<VkDescriptorImageInfo>> image_infos;
//used for verification to make sure a uniform set does not use a framebuffer bound texture
LocalVector<UniformSet::AttachableTexture> attachable_textures;
Vector<Texture *> mutable_sampled_textures;
Vector<Texture *> mutable_storage_textures;
for (uint32_t i = 0; i < set_uniform_count; i++) {
const UniformInfo &set_uniform = set_uniforms[i];
int uniform_idx = -1;
for (int j = 0; j < (int)uniform_count; j++) {
if (uniforms[j].binding == set_uniform.binding) {
uniform_idx = j;
}
}
ERR_FAIL_COND_V_MSG(uniform_idx == -1, RID(),
"All the shader bindings for the given set must be covered by the uniforms provided. Binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + ") was not provided.");
const Uniform &uniform = uniforms[uniform_idx];
ERR_FAIL_COND_V_MSG(uniform.uniform_type != set_uniform.type, RID(),
"Mismatch uniform type for binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + "). Expected '" + shader_uniform_names[set_uniform.type] + "', supplied: '" + shader_uniform_names[uniform.uniform_type] + "'.");
VkWriteDescriptorSet write; //common header
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.pNext = nullptr;
write.dstSet = VK_NULL_HANDLE; //will assign afterwards when everything is valid
write.dstBinding = set_uniform.binding;
write.dstArrayElement = 0;
write.descriptorCount = 0;
write.descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM; //Invalid value.
write.pImageInfo = nullptr;
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
uint32_t type_size = 1;
switch (uniform.uniform_type) {
case UNIFORM_TYPE_SAMPLER: {
if (uniform.ids.size() != set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Sampler (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler elements, so it should be provided equal number of sampler IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Sampler (binding: " + itos(uniform.binding) + ") should provide one ID referencing a sampler (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (int j = 0; j < uniform.ids.size(); j++) {
VkSampler *sampler = sampler_owner.get_or_null(uniform.ids[j]);
ERR_FAIL_COND_V_MSG(!sampler, RID(), "Sampler (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid sampler.");
VkDescriptorImageInfo img_info;
img_info.sampler = *sampler;
img_info.imageView = VK_NULL_HANDLE;
img_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size();
write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.ids.size();
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
if (uniform.ids.size() != set_uniform.length * 2) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "SamplerTexture (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler&texture elements, so it should provided twice the amount of IDs (sampler,texture pairs) to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "SamplerTexture (binding: " + itos(uniform.binding) + ") should provide two IDs referencing a sampler and then a texture (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (int j = 0; j < uniform.ids.size(); j += 2) {
VkSampler *sampler = sampler_owner.get_or_null(uniform.ids[j + 0]);
ERR_FAIL_COND_V_MSG(!sampler, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid sampler.");
Texture *texture = texture_owner.get_or_null(uniform.ids[j + 1]);
ERR_FAIL_COND_V_MSG(!texture, RID(), "Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(),
"Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_SAMPLING_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = *sampler;
img_info.imageView = texture->view;
if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) {
UniformSet::AttachableTexture attachable_texture;
attachable_texture.bind = set_uniform.binding;
attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j + 1];
attachable_textures.push_back(attachable_texture);
}
if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
//can also be used as storage, add to mutable sampled
mutable_sampled_textures.push_back(texture);
}
if (texture->owner.is_valid()) {
texture = texture_owner.get_or_null(texture->owner);
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size() / 2;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.ids.size() / 2;
} break;
case UNIFORM_TYPE_TEXTURE: {
if (uniform.ids.size() != set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Texture (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Texture (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (int j = 0; j < uniform.ids.size(); j++) {
Texture *texture = texture_owner.get_or_null(uniform.ids[j]);
ERR_FAIL_COND_V_MSG(!texture, RID(), "Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(),
"Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_SAMPLING_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = VK_NULL_HANDLE;
img_info.imageView = texture->view;
if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) {
UniformSet::AttachableTexture attachable_texture;
attachable_texture.bind = set_uniform.binding;
attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j];
attachable_textures.push_back(attachable_texture);
}
if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
//can also be used as storage, add to mutable sampled
mutable_sampled_textures.push_back(texture);
}
if (texture->owner.is_valid()) {
texture = texture_owner.get_or_null(texture->owner);
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size();
write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.ids.size();
} break;
case UNIFORM_TYPE_IMAGE: {
if (uniform.ids.size() != set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (int j = 0; j < uniform.ids.size(); j++) {
Texture *texture = texture_owner.get_or_null(uniform.ids[j]);
ERR_FAIL_COND_V_MSG(!texture, RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = VK_NULL_HANDLE;
img_info.imageView = texture->view;
if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
//can also be used as storage, add to mutable sampled
mutable_storage_textures.push_back(texture);
}
if (texture->owner.is_valid()) {
texture = texture_owner.get_or_null(texture->owner);
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size();
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.ids.size();
} break;
case UNIFORM_TYPE_TEXTURE_BUFFER: {
if (uniform.ids.size() != set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Buffer (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") texture buffer elements, so it should be provided equal number of texture buffer IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Buffer (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture buffer (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorBufferInfo> buffer_info;
Vector<VkBufferView> buffer_view;
for (int j = 0; j < uniform.ids.size(); j++) {
TextureBuffer *buffer = texture_buffer_owner.get_or_null(uniform.ids[j]);
ERR_FAIL_COND_V_MSG(!buffer, RID(), "Texture Buffer (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture buffer.");
buffer_info.push_back(buffer->buffer.buffer_info);
buffer_view.push_back(buffer->view);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size();
write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
write.pImageInfo = nullptr;
write.pBufferInfo = buffer_infos.push_back(buffer_info)->get().ptr();
write.pTexelBufferView = buffer_views.push_back(buffer_view)->get().ptr();
type_size = uniform.ids.size();
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
if (uniform.ids.size() != set_uniform.length * 2) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler buffer elements, so it should provided twice the amount of IDs (sampler,buffer pairs) to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ") should provide two IDs referencing a sampler and then a texture buffer (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
Vector<VkDescriptorBufferInfo> buffer_info;
Vector<VkBufferView> buffer_view;
for (int j = 0; j < uniform.ids.size(); j += 2) {
VkSampler *sampler = sampler_owner.get_or_null(uniform.ids[j + 0]);
ERR_FAIL_COND_V_MSG(!sampler, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid sampler.");
TextureBuffer *buffer = texture_buffer_owner.get_or_null(uniform.ids[j + 1]);
VkDescriptorImageInfo img_info;
img_info.sampler = *sampler;
img_info.imageView = VK_NULL_HANDLE;
img_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_info.push_back(img_info);
ERR_FAIL_COND_V_MSG(!buffer, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid texture buffer.");
buffer_info.push_back(buffer->buffer.buffer_info);
buffer_view.push_back(buffer->view);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size() / 2;
write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = buffer_infos.push_back(buffer_info)->get().ptr();
write.pTexelBufferView = buffer_views.push_back(buffer_view)->get().ptr();
type_size = uniform.ids.size() / 2;
} break;
case UNIFORM_TYPE_IMAGE_BUFFER: {
//todo
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER: {
ERR_FAIL_COND_V_MSG(uniform.ids.size() != 1, RID(),
"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.ids.size()) + " provided).");
Buffer *buffer = uniform_buffer_owner.get_or_null(uniform.ids[0]);
ERR_FAIL_COND_V_MSG(!buffer, RID(), "Uniform buffer supplied (binding: " + itos(uniform.binding) + ") is invalid.");
ERR_FAIL_COND_V_MSG(buffer->size != (uint32_t)set_uniform.length, RID(),
"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ").");
write.dstArrayElement = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
write.pImageInfo = nullptr;
write.pBufferInfo = &buffer->buffer_info;
write.pTexelBufferView = nullptr;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER: {
ERR_FAIL_COND_V_MSG(uniform.ids.size() != 1, RID(),
"Storage buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.ids.size()) + " provided).");
Buffer *buffer = nullptr;
if (storage_buffer_owner.owns(uniform.ids[0])) {
buffer = storage_buffer_owner.get_or_null(uniform.ids[0]);
} else if (vertex_buffer_owner.owns(uniform.ids[0])) {
buffer = vertex_buffer_owner.get_or_null(uniform.ids[0]);
ERR_FAIL_COND_V_MSG(!(buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), RID(), "Vertex buffer supplied (binding: " + itos(uniform.binding) + ") was not created with storage flag.");
}
ERR_FAIL_COND_V_MSG(!buffer, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") is invalid.");
//if 0, then it's sized on link time
ERR_FAIL_COND_V_MSG(set_uniform.length > 0 && buffer->size != (uint32_t)set_uniform.length, RID(),
"Storage buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ").");
write.dstArrayElement = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pImageInfo = nullptr;
write.pBufferInfo = &buffer->buffer_info;
write.pTexelBufferView = nullptr;
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for compute shader (this is not allowed).");
if (uniform.ids.size() != set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (int j = 0; j < uniform.ids.size(); j++) {
Texture *texture = texture_owner.get_or_null(uniform.ids[j]);
ERR_FAIL_COND_V_MSG(!texture, RID(),
"InputAttachment (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(),
"InputAttachment (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_SAMPLING_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = VK_NULL_HANDLE;
img_info.imageView = texture->view;
if (texture->owner.is_valid()) {
texture = texture_owner.get_or_null(texture->owner);
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size();
write.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.ids.size();
} break;
default: {
}
}
writes.push_back(write);
ERR_FAIL_COND_V_MSG(pool_key.uniform_type[set_uniform.type] == MAX_DESCRIPTOR_POOL_ELEMENT, RID(),
"Uniform set reached the limit of bindings for the same type (" + itos(MAX_DESCRIPTOR_POOL_ELEMENT) + ").");
pool_key.uniform_type[set_uniform.type] += type_size;
}
//need a descriptor pool
DescriptorPool *pool = _descriptor_pool_allocate(pool_key);
ERR_FAIL_COND_V(!pool, RID());
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
descriptor_set_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptor_set_allocate_info.pNext = nullptr;
descriptor_set_allocate_info.descriptorPool = pool->pool;
descriptor_set_allocate_info.descriptorSetCount = 1;
descriptor_set_allocate_info.pSetLayouts = &shader->sets[p_shader_set].descriptor_set_layout;
VkDescriptorSet descriptor_set;
VkResult res = vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, &descriptor_set);
if (res) {
_descriptor_pool_free(pool_key, pool); // meh
ERR_FAIL_V_MSG(RID(), "Cannot allocate descriptor sets, error " + itos(res) + ".");
}
UniformSet uniform_set;
uniform_set.pool = pool;
uniform_set.pool_key = pool_key;
uniform_set.descriptor_set = descriptor_set;
uniform_set.format = shader->set_formats[p_shader_set];
uniform_set.attachable_textures = attachable_textures;
uniform_set.mutable_sampled_textures = mutable_sampled_textures;
uniform_set.mutable_storage_textures = mutable_storage_textures;
uniform_set.shader_set = p_shader_set;
uniform_set.shader_id = p_shader;
RID id = uniform_set_owner.make_rid(uniform_set);
//add dependencies
_add_dependency(id, p_shader);
for (uint32_t i = 0; i < uniform_count; i++) {
const Uniform &uniform = uniforms[i];
int id_count = uniform.ids.size();
const RID *ids = uniform.ids.ptr();
for (int j = 0; j < id_count; j++) {
_add_dependency(id, ids[j]);
}
}
//write the contents
if (writes.size()) {
for (int i = 0; i < writes.size(); i++) {
writes.write[i].dstSet = descriptor_set;
}
vkUpdateDescriptorSets(device, writes.size(), writes.ptr(), 0, nullptr);
}
return id;
}
bool RenderingDeviceVulkan::uniform_set_is_valid(RID p_uniform_set) {
return uniform_set_owner.owns(p_uniform_set);
}
void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_set, UniformSetInvalidatedCallback p_callback, void *p_userdata) {
UniformSet *us = uniform_set_owner.get_or_null(p_uniform_set);
ERR_FAIL_COND(!us);
us->invalidated_callback = p_callback;
us->invalidated_callback_userdata = p_userdata;
}
Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER,
"Updating buffers is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER,
"Updating buffers is forbidden during creation of a compute list");
VkPipelineStageFlags dst_stage_mask = 0;
VkAccessFlags dst_access = 0;
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
// Protect subsequent updates...
dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
}
Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier);
if (!buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type.");
}
ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER,
"Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end.");
// no barrier should be needed here
// _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, true);
Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_post_barrier);
if (err) {
return err;
}
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) {
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true);
}
#endif
return err;
}
Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER,
"Size must be a multiple of four");
ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER,
"Updating buffers in is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER,
"Updating buffers is forbidden during creation of a compute list");
VkPipelineStageFlags dst_stage_mask = 0;
VkAccessFlags dst_access = 0;
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
// Protect subsequent updates...
dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
}
Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier);
if (!buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type.");
}
ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER,
"Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end.");
// should not be needed
// _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_post_barrier);
vkCmdFillBuffer(frames[frame].draw_command_buffer, buffer->buffer, p_offset, p_size, 0);
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask);
#endif
return OK;
}
Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) {
_THREAD_SAFE_METHOD_
// It could be this buffer was just created
VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT;
// Get the vulkan buffer and the potential stage/access possible
Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL);
if (!buffer) {
ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving.");
}
// Make sure no one is using the buffer -- the "false" gets us to the same command buffer as below.
_buffer_memory_barrier(buffer->buffer, 0, buffer->size, src_stage_mask, src_access_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, false);
VkCommandBuffer command_buffer = frames[frame].setup_command_buffer;
Buffer tmp_buffer;
_buffer_allocate(&tmp_buffer, buffer->size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY);
VkBufferCopy region;
region.srcOffset = 0;
region.dstOffset = 0;
region.size = buffer->size;
vkCmdCopyBuffer(command_buffer, buffer->buffer, tmp_buffer.buffer, 1, &region); //dst buffer is in CPU, but I wonder if src buffer needs a barrier for this..
//flush everything so memory can be safely mapped
_flush(true);
void *buffer_mem;
VkResult vkerr = vmaMapMemory(allocator, tmp_buffer.allocation, &buffer_mem);
ERR_FAIL_COND_V_MSG(vkerr, Vector<uint8_t>(), "vmaMapMemory failed with error " + itos(vkerr) + ".");
Vector<uint8_t> buffer_data;
{
buffer_data.resize(buffer->size);
uint8_t *w = buffer_data.ptrw();
memcpy(w, buffer_mem, buffer->size);
}
vmaUnmapMemory(allocator, tmp_buffer.allocation);
_buffer_free(&tmp_buffer);
return buffer_data;
}
/*************************/
/**** RENDER PIPELINE ****/
/*************************/
RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags, uint32_t p_for_render_pass, const Vector<PipelineSpecializationConstant> &p_specialization_constants) {
_THREAD_SAFE_METHOD_
//needs a shader
Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(shader->is_compute, RID(),
"Compute shaders can't be used in render pipelines");
if (p_framebuffer_format == INVALID_ID) {
//if nothing provided, use an empty one (no attachments)
p_framebuffer_format = framebuffer_format_create(Vector<AttachmentFormat>());
}
ERR_FAIL_COND_V(!framebuffer_formats.has(p_framebuffer_format), RID());
const FramebufferFormat &fb_format = framebuffer_formats[p_framebuffer_format];
{ //validate shader vs framebuffer
ERR_FAIL_COND_V_MSG(p_for_render_pass >= uint32_t(fb_format.E->key().passes.size()), RID(), "Render pass requested for pipeline creation (" + itos(p_for_render_pass) + ") is out of bounds");
const FramebufferPass &pass = fb_format.E->key().passes[p_for_render_pass];
uint32_t output_mask = 0;
for (int i = 0; i < pass.color_attachments.size(); i++) {
if (pass.color_attachments[i] != FramebufferPass::ATTACHMENT_UNUSED) {
output_mask |= 1 << i;
}
}
ERR_FAIL_COND_V_MSG(shader->fragment_output_mask != output_mask, RID(),
"Mismatch fragment shader output mask (" + itos(shader->fragment_output_mask) + ") and framebuffer color output mask (" + itos(output_mask) + ") when binding both in render pipeline.");
}
//vertex
VkPipelineVertexInputStateCreateInfo pipeline_vertex_input_state_create_info;
if (p_vertex_format != INVALID_ID) {
//uses vertices, else it does not
ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID());
const VertexDescriptionCache &vd = vertex_formats[p_vertex_format];
pipeline_vertex_input_state_create_info = vd.create_info;
//validate with inputs
for (uint32_t i = 0; i < 32; i++) {
if (!(shader->vertex_input_mask & (1 << i))) {
continue;
}
bool found = false;
for (int j = 0; j < vd.vertex_formats.size(); j++) {
if (vd.vertex_formats[j].location == i) {
found = true;
}
}
ERR_FAIL_COND_V_MSG(!found, RID(),
"Shader vertex input location (" + itos(i) + ") not provided in vertex input description for pipeline creation.");
}
} else {
//does not use vertices
pipeline_vertex_input_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
pipeline_vertex_input_state_create_info.pNext = nullptr;
pipeline_vertex_input_state_create_info.flags = 0;
pipeline_vertex_input_state_create_info.vertexBindingDescriptionCount = 0;
pipeline_vertex_input_state_create_info.pVertexBindingDescriptions = nullptr;
pipeline_vertex_input_state_create_info.vertexAttributeDescriptionCount = 0;
pipeline_vertex_input_state_create_info.pVertexAttributeDescriptions = nullptr;
ERR_FAIL_COND_V_MSG(shader->vertex_input_mask != 0, RID(),
"Shader contains vertex inputs, but no vertex input description was provided for pipeline creation.");
}
//input assembly
ERR_FAIL_INDEX_V(p_render_primitive, RENDER_PRIMITIVE_MAX, RID());
VkPipelineInputAssemblyStateCreateInfo input_assembly_create_info;
input_assembly_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
input_assembly_create_info.pNext = nullptr;
input_assembly_create_info.flags = 0;
static const VkPrimitiveTopology topology_list[RENDER_PRIMITIVE_MAX] = {
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
VK_PRIMITIVE_TOPOLOGY_PATCH_LIST
};
input_assembly_create_info.topology = topology_list[p_render_primitive];
input_assembly_create_info.primitiveRestartEnable = (p_render_primitive == RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX);
//tessellation
VkPipelineTessellationStateCreateInfo tessellation_create_info;
tessellation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
tessellation_create_info.pNext = nullptr;
tessellation_create_info.flags = 0;
ERR_FAIL_COND_V(limits.maxTessellationPatchSize > 0 && (p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > limits.maxTessellationPatchSize), RID());
tessellation_create_info.patchControlPoints = p_rasterization_state.patch_control_points;
VkPipelineViewportStateCreateInfo viewport_state_create_info;
viewport_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
viewport_state_create_info.pNext = nullptr;
viewport_state_create_info.flags = 0;
viewport_state_create_info.viewportCount = 1; //if VR extensions are supported at some point, this will have to be customizable in the framebuffer format
viewport_state_create_info.pViewports = nullptr;
viewport_state_create_info.scissorCount = 1;
viewport_state_create_info.pScissors = nullptr;
//rasterization
VkPipelineRasterizationStateCreateInfo rasterization_state_create_info;
rasterization_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_state_create_info.pNext = nullptr;
rasterization_state_create_info.flags = 0;
rasterization_state_create_info.depthClampEnable = p_rasterization_state.enable_depth_clamp;
rasterization_state_create_info.rasterizerDiscardEnable = p_rasterization_state.discard_primitives;
rasterization_state_create_info.polygonMode = (p_rasterization_state.wireframe ? VK_POLYGON_MODE_LINE : VK_POLYGON_MODE_FILL);
static VkCullModeFlags cull_mode[3] = {
VK_CULL_MODE_NONE,
VK_CULL_MODE_FRONT_BIT,
VK_CULL_MODE_BACK_BIT
};
ERR_FAIL_INDEX_V(p_rasterization_state.cull_mode, 3, RID());
rasterization_state_create_info.cullMode = cull_mode[p_rasterization_state.cull_mode];
rasterization_state_create_info.frontFace = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE);
rasterization_state_create_info.depthBiasEnable = p_rasterization_state.depth_bias_enable;
rasterization_state_create_info.depthBiasConstantFactor = p_rasterization_state.depth_bias_constant_factor;
rasterization_state_create_info.depthBiasClamp = p_rasterization_state.depth_bias_clamp;
rasterization_state_create_info.depthBiasSlopeFactor = p_rasterization_state.depth_bias_slope_factor;
rasterization_state_create_info.lineWidth = p_rasterization_state.line_width;
//multisample
VkPipelineMultisampleStateCreateInfo multisample_state_create_info;
multisample_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisample_state_create_info.pNext = nullptr;
multisample_state_create_info.flags = 0;
multisample_state_create_info.rasterizationSamples = rasterization_sample_count[p_multisample_state.sample_count];
multisample_state_create_info.sampleShadingEnable = p_multisample_state.enable_sample_shading;
multisample_state_create_info.minSampleShading = p_multisample_state.min_sample_shading;
Vector<VkSampleMask> sample_mask;
if (p_multisample_state.sample_mask.size()) {
//use sample mask
int rasterization_sample_mask_expected_size[TEXTURE_SAMPLES_MAX] = {
1, 2, 4, 8, 16, 32, 64
};
ERR_FAIL_COND_V(rasterization_sample_mask_expected_size[p_multisample_state.sample_count] != p_multisample_state.sample_mask.size(), RID());
sample_mask.resize(p_multisample_state.sample_mask.size());
for (int i = 0; i < p_multisample_state.sample_mask.size(); i++) {
VkSampleMask mask = p_multisample_state.sample_mask[i];
sample_mask.push_back(mask);
}
multisample_state_create_info.pSampleMask = sample_mask.ptr();
} else {
multisample_state_create_info.pSampleMask = nullptr;
}
multisample_state_create_info.alphaToCoverageEnable = p_multisample_state.enable_alpha_to_coverage;
multisample_state_create_info.alphaToOneEnable = p_multisample_state.enable_alpha_to_one;
//depth stencil
VkPipelineDepthStencilStateCreateInfo depth_stencil_state_create_info;
depth_stencil_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
depth_stencil_state_create_info.pNext = nullptr;
depth_stencil_state_create_info.flags = 0;
depth_stencil_state_create_info.depthTestEnable = p_depth_stencil_state.enable_depth_test;
depth_stencil_state_create_info.depthWriteEnable = p_depth_stencil_state.enable_depth_write;
ERR_FAIL_INDEX_V(p_depth_stencil_state.depth_compare_operator, COMPARE_OP_MAX, RID());
depth_stencil_state_create_info.depthCompareOp = compare_operators[p_depth_stencil_state.depth_compare_operator];
depth_stencil_state_create_info.depthBoundsTestEnable = p_depth_stencil_state.enable_depth_range;
depth_stencil_state_create_info.stencilTestEnable = p_depth_stencil_state.enable_stencil;
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.front.failOp = stencil_operations[p_depth_stencil_state.front_op.fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.pass, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.front.passOp = stencil_operations[p_depth_stencil_state.front_op.pass];
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.depth_fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.front.depthFailOp = stencil_operations[p_depth_stencil_state.front_op.depth_fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.compare, COMPARE_OP_MAX, RID());
depth_stencil_state_create_info.front.compareOp = compare_operators[p_depth_stencil_state.front_op.compare];
depth_stencil_state_create_info.front.compareMask = p_depth_stencil_state.front_op.compare_mask;
depth_stencil_state_create_info.front.writeMask = p_depth_stencil_state.front_op.write_mask;
depth_stencil_state_create_info.front.reference = p_depth_stencil_state.front_op.reference;
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.back.failOp = stencil_operations[p_depth_stencil_state.back_op.fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.pass, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.back.passOp = stencil_operations[p_depth_stencil_state.back_op.pass];
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.depth_fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.back.depthFailOp = stencil_operations[p_depth_stencil_state.back_op.depth_fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.compare, COMPARE_OP_MAX, RID());
depth_stencil_state_create_info.back.compareOp = compare_operators[p_depth_stencil_state.back_op.compare];
depth_stencil_state_create_info.back.compareMask = p_depth_stencil_state.back_op.compare_mask;
depth_stencil_state_create_info.back.writeMask = p_depth_stencil_state.back_op.write_mask;
depth_stencil_state_create_info.back.reference = p_depth_stencil_state.back_op.reference;
depth_stencil_state_create_info.minDepthBounds = p_depth_stencil_state.depth_range_min;
depth_stencil_state_create_info.maxDepthBounds = p_depth_stencil_state.depth_range_max;
//blend state
VkPipelineColorBlendStateCreateInfo color_blend_state_create_info;
color_blend_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
color_blend_state_create_info.pNext = nullptr;
color_blend_state_create_info.flags = 0;
color_blend_state_create_info.logicOpEnable = p_blend_state.enable_logic_op;
ERR_FAIL_INDEX_V(p_blend_state.logic_op, LOGIC_OP_MAX, RID());
color_blend_state_create_info.logicOp = logic_operations[p_blend_state.logic_op];
Vector<VkPipelineColorBlendAttachmentState> attachment_states;
{
const FramebufferPass &pass = fb_format.E->key().passes[p_for_render_pass];
for (int i = 0; i < pass.color_attachments.size(); i++) {
if (pass.color_attachments[i] != FramebufferPass::ATTACHMENT_UNUSED) {
int idx = attachment_states.size();
ERR_FAIL_INDEX_V(idx, p_blend_state.attachments.size(), RID());
VkPipelineColorBlendAttachmentState state;
state.blendEnable = p_blend_state.attachments[idx].enable_blend;
ERR_FAIL_INDEX_V(p_blend_state.attachments[idx].src_color_blend_factor, BLEND_FACTOR_MAX, RID());
state.srcColorBlendFactor = blend_factors[p_blend_state.attachments[idx].src_color_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[idx].dst_color_blend_factor, BLEND_FACTOR_MAX, RID());
state.dstColorBlendFactor = blend_factors[p_blend_state.attachments[idx].dst_color_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[idx].color_blend_op, BLEND_OP_MAX, RID());
state.colorBlendOp = blend_operations[p_blend_state.attachments[idx].color_blend_op];
ERR_FAIL_INDEX_V(p_blend_state.attachments[idx].src_alpha_blend_factor, BLEND_FACTOR_MAX, RID());
state.srcAlphaBlendFactor = blend_factors[p_blend_state.attachments[idx].src_alpha_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[idx].dst_alpha_blend_factor, BLEND_FACTOR_MAX, RID());
state.dstAlphaBlendFactor = blend_factors[p_blend_state.attachments[idx].dst_alpha_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[idx].alpha_blend_op, BLEND_OP_MAX, RID());
state.alphaBlendOp = blend_operations[p_blend_state.attachments[idx].alpha_blend_op];
state.colorWriteMask = 0;
if (p_blend_state.attachments[idx].write_r) {
state.colorWriteMask |= VK_COLOR_COMPONENT_R_BIT;
}
if (p_blend_state.attachments[idx].write_g) {
state.colorWriteMask |= VK_COLOR_COMPONENT_G_BIT;
}
if (p_blend_state.attachments[idx].write_b) {
state.colorWriteMask |= VK_COLOR_COMPONENT_B_BIT;
}
if (p_blend_state.attachments[idx].write_a) {
state.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT;
}
attachment_states.push_back(state);
idx++;
}
}
ERR_FAIL_COND_V(attachment_states.size() != p_blend_state.attachments.size(), RID());
}
color_blend_state_create_info.attachmentCount = attachment_states.size();
color_blend_state_create_info.pAttachments = attachment_states.ptr();
color_blend_state_create_info.blendConstants[0] = p_blend_state.blend_constant.r;
color_blend_state_create_info.blendConstants[1] = p_blend_state.blend_constant.g;
color_blend_state_create_info.blendConstants[2] = p_blend_state.blend_constant.b;
color_blend_state_create_info.blendConstants[3] = p_blend_state.blend_constant.a;
//dynamic state
VkPipelineDynamicStateCreateInfo dynamic_state_create_info;
dynamic_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_create_info.pNext = nullptr;
dynamic_state_create_info.flags = 0;
Vector<VkDynamicState> dynamic_states; //vulkan is weird..
dynamic_states.push_back(VK_DYNAMIC_STATE_VIEWPORT); //viewport and scissor are always dynamic
dynamic_states.push_back(VK_DYNAMIC_STATE_SCISSOR);
if (p_dynamic_state_flags & DYNAMIC_STATE_LINE_WIDTH) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_WIDTH);
}
if (p_dynamic_state_flags & DYNAMIC_STATE_DEPTH_BIAS) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
}
if (p_dynamic_state_flags & DYNAMIC_STATE_BLEND_CONSTANTS) {
dynamic_states.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS);
}
if (p_dynamic_state_flags & DYNAMIC_STATE_DEPTH_BOUNDS) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
}
if (p_dynamic_state_flags & DYNAMIC_STATE_STENCIL_COMPARE_MASK) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
}
if (p_dynamic_state_flags & DYNAMIC_STATE_STENCIL_WRITE_MASK) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK);
}
if (p_dynamic_state_flags & DYNAMIC_STATE_STENCIL_REFERENCE) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
}
dynamic_state_create_info.dynamicStateCount = dynamic_states.size();
dynamic_state_create_info.pDynamicStates = dynamic_states.ptr();
//finally, pipeline create info
VkGraphicsPipelineCreateInfo graphics_pipeline_create_info;
graphics_pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
graphics_pipeline_create_info.pNext = nullptr;
graphics_pipeline_create_info.flags = 0;
Vector<VkPipelineShaderStageCreateInfo> pipeline_stages = shader->pipeline_stages;
Vector<VkSpecializationInfo> specialization_info;
Vector<Vector<VkSpecializationMapEntry>> specialization_map_entries;
Vector<uint32_t> specialization_constant_data;
if (shader->specialization_constants.size()) {
specialization_constant_data.resize(shader->specialization_constants.size());
uint32_t *data_ptr = specialization_constant_data.ptrw();
specialization_info.resize(pipeline_stages.size());
specialization_map_entries.resize(pipeline_stages.size());
for (int i = 0; i < shader->specialization_constants.size(); i++) {
//see if overridden
const Shader::SpecializationConstant &sc = shader->specialization_constants[i];
data_ptr[i] = sc.constant.int_value; //just copy the 32 bits
for (int j = 0; j < p_specialization_constants.size(); j++) {
const PipelineSpecializationConstant &psc = p_specialization_constants[j];
if (psc.constant_id == sc.constant.constant_id) {
ERR_FAIL_COND_V_MSG(psc.type != sc.constant.type, RID(), "Specialization constant provided for id (" + itos(sc.constant.constant_id) + ") is of the wrong type.");
data_ptr[i] = psc.int_value;
break;
}
}
VkSpecializationMapEntry entry;
entry.constantID = sc.constant.constant_id;
entry.offset = i * sizeof(uint32_t);
entry.size = sizeof(uint32_t);
for (int j = 0; j < SHADER_STAGE_MAX; j++) {
if (sc.stage_flags & (1 << j)) {
VkShaderStageFlagBits stage = shader_stage_masks[j];
for (int k = 0; k < pipeline_stages.size(); k++) {
if (pipeline_stages[k].stage == stage) {
specialization_map_entries.write[k].push_back(entry);
}
}
}
}
}
for (int i = 0; i < pipeline_stages.size(); i++) {
if (specialization_map_entries[i].size()) {
specialization_info.write[i].dataSize = specialization_constant_data.size() * sizeof(uint32_t);
specialization_info.write[i].pData = data_ptr;
specialization_info.write[i].mapEntryCount = specialization_map_entries[i].size();
specialization_info.write[i].pMapEntries = specialization_map_entries[i].ptr();
pipeline_stages.write[i].pSpecializationInfo = specialization_info.ptr() + i;
}
}
}
graphics_pipeline_create_info.stageCount = pipeline_stages.size();
graphics_pipeline_create_info.pStages = pipeline_stages.ptr();
graphics_pipeline_create_info.pVertexInputState = &pipeline_vertex_input_state_create_info;
graphics_pipeline_create_info.pInputAssemblyState = &input_assembly_create_info;
graphics_pipeline_create_info.pTessellationState = &tessellation_create_info;
graphics_pipeline_create_info.pViewportState = &viewport_state_create_info;
graphics_pipeline_create_info.pRasterizationState = &rasterization_state_create_info;
graphics_pipeline_create_info.pMultisampleState = &multisample_state_create_info;
graphics_pipeline_create_info.pDepthStencilState = &depth_stencil_state_create_info;
graphics_pipeline_create_info.pColorBlendState = &color_blend_state_create_info;
graphics_pipeline_create_info.pDynamicState = &dynamic_state_create_info;
graphics_pipeline_create_info.layout = shader->pipeline_layout;
graphics_pipeline_create_info.renderPass = fb_format.render_pass;
graphics_pipeline_create_info.subpass = p_for_render_pass;
graphics_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
graphics_pipeline_create_info.basePipelineIndex = 0;
RenderPipeline pipeline;
VkResult err = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &graphics_pipeline_create_info, nullptr, &pipeline.pipeline);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + " for shader '" + shader->name + "'.");
pipeline.set_formats = shader->set_formats;
pipeline.push_constant_stages = shader->push_constant.push_constants_vk_stage;
pipeline.pipeline_layout = shader->pipeline_layout;
pipeline.shader = p_shader;
pipeline.push_constant_size = shader->push_constant.push_constant_size;
#ifdef DEBUG_ENABLED
pipeline.validation.dynamic_state = p_dynamic_state_flags;
pipeline.validation.framebuffer_format = p_framebuffer_format;
pipeline.validation.render_pass = p_for_render_pass;
pipeline.validation.vertex_format = p_vertex_format;
pipeline.validation.uses_restart_indices = input_assembly_create_info.primitiveRestartEnable;
static const uint32_t primitive_divisor[RENDER_PRIMITIVE_MAX] = {
1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1
};
pipeline.validation.primitive_divisor = primitive_divisor[p_render_primitive];
static const uint32_t primitive_minimum[RENDER_PRIMITIVE_MAX] = {
1,
2,
2,
2,
2,
3,
3,
3,
3,
3,
1,
};
pipeline.validation.primitive_minimum = primitive_minimum[p_render_primitive];
#endif
//create ID to associate with this pipeline
RID id = render_pipeline_owner.make_rid(pipeline);
//now add all the dependencies
_add_dependency(id, p_shader);
return id;
}
bool RenderingDeviceVulkan::render_pipeline_is_valid(RID p_pipeline) {
_THREAD_SAFE_METHOD_
return render_pipeline_owner.owns(p_pipeline);
}
/**************************/
/**** COMPUTE PIPELINE ****/
/**************************/
RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader, const Vector<PipelineSpecializationConstant> &p_specialization_constants) {
_THREAD_SAFE_METHOD_
//needs a shader
Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(),
"Non-compute shaders can't be used in compute pipelines");
//finally, pipeline create info
VkComputePipelineCreateInfo compute_pipeline_create_info;
compute_pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
compute_pipeline_create_info.pNext = nullptr;
compute_pipeline_create_info.flags = 0;
compute_pipeline_create_info.stage = shader->pipeline_stages[0];
compute_pipeline_create_info.layout = shader->pipeline_layout;
compute_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
compute_pipeline_create_info.basePipelineIndex = 0;
VkSpecializationInfo specialization_info;
Vector<VkSpecializationMapEntry> specialization_map_entries;
Vector<uint32_t> specialization_constant_data;
if (shader->specialization_constants.size()) {
specialization_constant_data.resize(shader->specialization_constants.size());
uint32_t *data_ptr = specialization_constant_data.ptrw();
for (int i = 0; i < shader->specialization_constants.size(); i++) {
//see if overridden
const Shader::SpecializationConstant &sc = shader->specialization_constants[i];
data_ptr[i] = sc.constant.int_value; //just copy the 32 bits
for (int j = 0; j < p_specialization_constants.size(); j++) {
const PipelineSpecializationConstant &psc = p_specialization_constants[j];
if (psc.constant_id == sc.constant.constant_id) {
ERR_FAIL_COND_V_MSG(psc.type != sc.constant.type, RID(), "Specialization constant provided for id (" + itos(sc.constant.constant_id) + ") is of the wrong type.");
data_ptr[i] = sc.constant.int_value;
break;
}
}
VkSpecializationMapEntry entry;
entry.constantID = sc.constant.constant_id;
entry.offset = i * sizeof(uint32_t);
entry.size = sizeof(uint32_t);
specialization_map_entries.push_back(entry);
}
specialization_info.dataSize = specialization_constant_data.size() * sizeof(uint32_t);
specialization_info.pData = data_ptr;
specialization_info.mapEntryCount = specialization_map_entries.size();
specialization_info.pMapEntries = specialization_map_entries.ptr();
compute_pipeline_create_info.stage.pSpecializationInfo = &specialization_info;
}
ComputePipeline pipeline;
VkResult err = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &compute_pipeline_create_info, nullptr, &pipeline.pipeline);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateComputePipelines failed with error " + itos(err) + ".");
pipeline.set_formats = shader->set_formats;
pipeline.push_constant_stages = shader->push_constant.push_constants_vk_stage;
pipeline.pipeline_layout = shader->pipeline_layout;
pipeline.shader = p_shader;
pipeline.push_constant_size = shader->push_constant.push_constant_size;
pipeline.local_group_size[0] = shader->compute_local_size[0];
pipeline.local_group_size[1] = shader->compute_local_size[1];
pipeline.local_group_size[2] = shader->compute_local_size[2];
//create ID to associate with this pipeline
RID id = compute_pipeline_owner.make_rid(pipeline);
//now add all the dependencies
_add_dependency(id, p_shader);
return id;
}
bool RenderingDeviceVulkan::compute_pipeline_is_valid(RID p_pipeline) {
return compute_pipeline_owner.owns(p_pipeline);
}
/****************/
/**** SCREEN ****/
/****************/
int RenderingDeviceVulkan::screen_get_width(DisplayServer::WindowID p_screen) const {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen");
return context->window_get_width(p_screen);
}
int RenderingDeviceVulkan::screen_get_height(DisplayServer::WindowID p_screen) const {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen");
return context->window_get_height(p_screen);
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::screen_get_framebuffer_format() const {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen");
//very hacky, but not used often per frame so I guess ok
VkFormat vkformat = context->get_screen_format();
DataFormat format = DATA_FORMAT_MAX;
for (int i = 0; i < DATA_FORMAT_MAX; i++) {
if (vkformat == vulkan_formats[i]) {
format = DataFormat(i);
break;
}
}
ERR_FAIL_COND_V(format == DATA_FORMAT_MAX, INVALID_ID);
AttachmentFormat attachment;
attachment.format = format;
attachment.samples = TEXTURE_SAMPLES_1;
attachment.usage_flags = TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
Vector<AttachmentFormat> screen_attachment;
screen_attachment.push_back(attachment);
return const_cast<RenderingDeviceVulkan *>(this)->framebuffer_format_create(screen_attachment);
}
/*******************/
/**** DRAW LIST ****/
/*******************/
RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(DisplayServer::WindowID p_screen, const Color &p_clear_color) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen");
ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time.");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
if (!context->window_is_valid_swapchain(p_screen)) {
return INVALID_ID;
}
Size2i size = Size2i(context->window_get_width(p_screen), context->window_get_height(p_screen));
_draw_list_allocate(Rect2i(Vector2i(), size), 0, 0);
#ifdef DEBUG_ENABLED
draw_list_framebuffer_format = screen_get_framebuffer_format();
#endif
draw_list_subpass_count = 1;
VkRenderPassBeginInfo render_pass_begin;
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = context->window_get_render_pass(p_screen);
render_pass_begin.framebuffer = context->window_get_framebuffer(p_screen);
render_pass_begin.renderArea.extent.width = size.width;
render_pass_begin.renderArea.extent.height = size.height;
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
render_pass_begin.clearValueCount = 1;
VkClearValue clear_value;
clear_value.color.float32[0] = p_clear_color.r;
clear_value.color.float32[1] = p_clear_color.g;
clear_value.color.float32[2] = p_clear_color.b;
clear_value.color.float32[3] = p_clear_color.a;
render_pass_begin.pClearValues = &clear_value;
vkCmdBeginRenderPass(command_buffer, &render_pass_begin, VK_SUBPASS_CONTENTS_INLINE);
uint32_t size_x = screen_get_width(p_screen);
uint32_t size_y = screen_get_height(p_screen);
VkViewport viewport;
viewport.x = 0;
viewport.y = 0;
viewport.width = size_x;
viewport.height = size_y;
viewport.minDepth = 0;
viewport.maxDepth = 1.0;
vkCmdSetViewport(command_buffer, 0, 1, &viewport);
VkRect2D scissor;
scissor.offset.x = 0;
scissor.offset.y = 0;
scissor.extent.width = size_x;
scissor.extent.height = size_y;
vkCmdSetScissor(command_buffer, 0, 1, &scissor);
return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
}
Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass, uint32_t *r_subpass_count) {
Framebuffer::VersionKey vk;
vk.initial_color_action = p_initial_color_action;
vk.final_color_action = p_final_color_action;
vk.initial_depth_action = p_initial_depth_action;
vk.final_depth_action = p_final_depth_action;
vk.view_count = p_framebuffer->view_count;
if (!p_framebuffer->framebuffers.has(vk)) {
//need to create this version
Framebuffer::Version version;
version.render_pass = _render_pass_create(framebuffer_formats[p_framebuffer->format_id].E->key().attachments, framebuffer_formats[p_framebuffer->format_id].E->key().passes, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_framebuffer->view_count);
VkFramebufferCreateInfo framebuffer_create_info;
framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebuffer_create_info.pNext = nullptr;
framebuffer_create_info.flags = 0;
framebuffer_create_info.renderPass = version.render_pass;
Vector<VkImageView> attachments;
for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]);
ERR_FAIL_COND_V(!texture, ERR_BUG);
attachments.push_back(texture->view);
ERR_FAIL_COND_V(texture->width != p_framebuffer->size.width, ERR_BUG);
ERR_FAIL_COND_V(texture->height != p_framebuffer->size.height, ERR_BUG);
}
framebuffer_create_info.attachmentCount = attachments.size();
framebuffer_create_info.pAttachments = attachments.ptr();
framebuffer_create_info.width = p_framebuffer->size.width;
framebuffer_create_info.height = p_framebuffer->size.height;
framebuffer_create_info.layers = 1;
VkResult err = vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, &version.framebuffer);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vkCreateFramebuffer failed with error " + itos(err) + ".");
version.subpass_count = framebuffer_formats[p_framebuffer->format_id].E->key().passes.size();
p_framebuffer->framebuffers.insert(vk, version);
}
const Framebuffer::Version &version = p_framebuffer->framebuffers[vk];
*r_framebuffer = version.framebuffer;
*r_render_pass = version.render_pass;
*r_subpass_count = version.subpass_count;
return OK;
}
Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures) {
VkRenderPassBeginInfo render_pass_begin;
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = render_pass;
render_pass_begin.framebuffer = vkframebuffer;
/*
* Given how API works, it makes sense to always fully operate on the whole framebuffer.
* This allows better continue operations for operations like shadowmapping.
render_pass_begin.renderArea.extent.width = viewport_size.width;
render_pass_begin.renderArea.extent.height = viewport_size.height;
render_pass_begin.renderArea.offset.x = viewport_offset.x;
render_pass_begin.renderArea.offset.y = viewport_offset.y;
*/
render_pass_begin.renderArea.extent.width = framebuffer->size.width;
render_pass_begin.renderArea.extent.height = framebuffer->size.height;
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
Vector<VkClearValue> clear_values;
clear_values.resize(framebuffer->texture_ids.size());
{
int color_index = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
VkClearValue clear_value;
if (color_index < p_clear_colors.size() && texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); //a bug
Color clear_color = p_clear_colors[color_index];
clear_value.color.float32[0] = clear_color.r;
clear_value.color.float32[1] = clear_color.g;
clear_value.color.float32[2] = clear_color.b;
clear_value.color.float32[3] = clear_color.a;
color_index++;
} else if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
clear_value.depthStencil.depth = p_clear_depth;
clear_value.depthStencil.stencil = p_clear_stencil;
} else {
clear_value.color.float32[0] = 0;
clear_value.color.float32[1] = 0;
clear_value.color.float32[2] = 0;
clear_value.color.float32[3] = 0;
}
clear_values.write[i] = clear_value;
}
}
render_pass_begin.clearValueCount = clear_values.size();
render_pass_begin.pClearValues = clear_values.ptr();
for (int i = 0; i < p_storage_textures.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_storage_textures[i]);
ERR_CONTINUE_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), "Supplied storage texture " + itos(i) + " for draw list is not set to be used for storage.");
if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
//must change layout to general
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = texture->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer;
image_memory_barrier.subresourceRange.layerCount = texture->layers;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
texture->layout = VK_IMAGE_LAYOUT_GENERAL;
draw_list_storage_textures.push_back(p_storage_textures[i]);
}
}
vkCmdBeginRenderPass(command_buffer, &render_pass_begin, subpass_contents);
//mark textures as bound
draw_list_bound_textures.clear();
draw_list_unbind_color_textures = p_final_color_action != FINAL_ACTION_CONTINUE;
draw_list_unbind_depth_textures = p_final_depth_action != FINAL_ACTION_CONTINUE;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
texture->bound = true;
draw_list_bound_textures.push_back(framebuffer->texture_ids[i]);
}
return OK;
}
void RenderingDeviceVulkan::_draw_list_insert_clear_region(DrawList *draw_list, Framebuffer *framebuffer, Point2i viewport_offset, Point2i viewport_size, bool p_clear_color, const Vector<Color> &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil) {
Vector<VkClearAttachment> clear_attachments;
int color_index = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
VkClearAttachment clear_at = {};
if (p_clear_color && texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
ERR_FAIL_INDEX(color_index, p_clear_colors.size()); //a bug
Color clear_color = p_clear_colors[color_index];
clear_at.clearValue.color.float32[0] = clear_color.r;
clear_at.clearValue.color.float32[1] = clear_color.g;
clear_at.clearValue.color.float32[2] = clear_color.b;
clear_at.clearValue.color.float32[3] = clear_color.a;
clear_at.colorAttachment = color_index++;
clear_at.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
} else if (p_clear_depth && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
clear_at.clearValue.depthStencil.depth = p_depth;
clear_at.clearValue.depthStencil.stencil = p_stencil;
clear_at.colorAttachment = 0;
clear_at.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (format_has_stencil(texture->format)) {
clear_at.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
} else {
ERR_CONTINUE(true);
}
clear_attachments.push_back(clear_at);
}
VkClearRect cr;
cr.baseArrayLayer = 0;
cr.layerCount = 1;
cr.rect.offset.x = viewport_offset.x;
cr.rect.offset.y = viewport_offset.y;
cr.rect.extent.width = viewport_size.width;
cr.rect.extent.height = viewport_size.height;
vkCmdClearAttachments(draw_list->command_buffer, clear_attachments.size(), clear_attachments.ptr(), 1, &cr);
}
RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time.");
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
Point2i viewport_offset;
Point2i viewport_size = framebuffer->size;
bool needs_clear_color = false;
bool needs_clear_depth = false;
if (p_region != Rect2() && p_region != Rect2(Vector2(), viewport_size)) { //check custom region
Rect2i viewport(viewport_offset, viewport_size);
Rect2i regioni = p_region;
if (!(regioni.position.x >= viewport.position.x) && (regioni.position.y >= viewport.position.y) &&
((regioni.position.x + regioni.size.x) <= (viewport.position.x + viewport.size.x)) &&
((regioni.position.y + regioni.size.y) <= (viewport.position.y + viewport.size.y))) {
ERR_FAIL_V_MSG(INVALID_ID, "When supplying a custom region, it must be contained within the framebuffer rectangle");
}
viewport_offset = regioni.position;
viewport_size = regioni.size;
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_CONTINUE;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_CONTINUE;
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_KEEP;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_KEEP;
}
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR) { //check clear values
int color_count = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
color_count++;
}
}
ERR_FAIL_COND_V_MSG(p_clear_color_values.size() != color_count, INVALID_ID,
"Clear color values supplied (" + itos(p_clear_color_values.size()) + ") differ from the amount required for framebuffer color attachments (" + itos(color_count) + ").");
}
VkFramebuffer vkframebuffer;
VkRenderPass render_pass;
Error err = _draw_list_setup_framebuffer(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, &vkframebuffer, &render_pass, &draw_list_subpass_count);
ERR_FAIL_COND_V(err != OK, INVALID_ID);
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, command_buffer, VK_SUBPASS_CONTENTS_INLINE, p_storage_textures);
if (err != OK) {
return INVALID_ID;
}
draw_list_render_pass = render_pass;
draw_list_vkframebuffer = vkframebuffer;
_draw_list_allocate(Rect2i(viewport_offset, viewport_size), 0, 0);
#ifdef DEBUG_ENABLED
draw_list_framebuffer_format = framebuffer->format_id;
#endif
draw_list_current_subpass = 0;
if (needs_clear_color || needs_clear_depth) {
_draw_list_insert_clear_region(draw_list, framebuffer, viewport_offset, viewport_size, needs_clear_color, p_clear_color_values, needs_clear_depth, p_clear_depth, p_clear_stencil);
}
VkViewport viewport;
viewport.x = viewport_offset.x;
viewport.y = viewport_offset.y;
viewport.width = viewport_size.width;
viewport.height = viewport_size.height;
viewport.minDepth = 0;
viewport.maxDepth = 1.0;
vkCmdSetViewport(command_buffer, 0, 1, &viewport);
VkRect2D scissor;
scissor.offset.x = viewport_offset.x;
scissor.offset.y = viewport_offset.y;
scissor.extent.width = viewport_size.width;
scissor.extent.height = viewport_size.height;
vkCmdSetScissor(command_buffer, 0, 1, &scissor);
return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
}
Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_splits < 1, ERR_INVALID_DECLARATION);
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, ERR_INVALID_DECLARATION);
Point2i viewport_offset;
Point2i viewport_size = framebuffer->size;
bool needs_clear_color = false;
bool needs_clear_depth = false;
if (p_region != Rect2() && p_region != Rect2(Vector2(), viewport_size)) { //check custom region
Rect2i viewport(viewport_offset, viewport_size);
Rect2i regioni = p_region;
if (!(regioni.position.x >= viewport.position.x) && (regioni.position.y >= viewport.position.y) &&
((regioni.position.x + regioni.size.x) <= (viewport.position.x + viewport.size.x)) &&
((regioni.position.y + regioni.size.y) <= (viewport.position.y + viewport.size.y))) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "When supplying a custom region, it must be contained within the framebuffer rectangle");
}
viewport_offset = regioni.position;
viewport_size = regioni.size;
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_KEEP;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_KEEP;
}
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR) { //check clear values
int color_count = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
color_count++;
}
}
ERR_FAIL_COND_V_MSG(p_clear_color_values.size() != color_count, ERR_INVALID_PARAMETER,
"Clear color values supplied (" + itos(p_clear_color_values.size()) + ") differ from the amount required for framebuffer (" + itos(color_count) + ").");
}
VkFramebuffer vkframebuffer;
VkRenderPass render_pass;
Error err = _draw_list_setup_framebuffer(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, &vkframebuffer, &render_pass, &draw_list_subpass_count);
ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);
VkCommandBuffer frame_command_buffer = frames[frame].draw_command_buffer;
err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, frame_command_buffer, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, p_storage_textures);
if (err != OK) {
return ERR_CANT_CREATE;
}
draw_list_current_subpass = 0;
#ifdef DEBUG_ENABLED
draw_list_framebuffer_format = framebuffer->format_id;
#endif
draw_list_render_pass = render_pass;
draw_list_vkframebuffer = vkframebuffer;
err = _draw_list_allocate(Rect2i(viewport_offset, viewport_size), p_splits, 0);
if (err != OK) {
return err;
}
if (needs_clear_color || needs_clear_depth) {
_draw_list_insert_clear_region(&draw_list[0], framebuffer, viewport_offset, viewport_size, needs_clear_color, p_clear_color_values, needs_clear_depth, p_clear_depth, p_clear_stencil);
}
for (uint32_t i = 0; i < p_splits; i++) {
VkViewport viewport;
viewport.x = viewport_offset.x;
viewport.y = viewport_offset.y;
viewport.width = viewport_size.width;
viewport.height = viewport_size.height;
viewport.minDepth = 0;
viewport.maxDepth = 1.0;
vkCmdSetViewport(draw_list[i].command_buffer, 0, 1, &viewport);
VkRect2D scissor;
scissor.offset.x = viewport_offset.x;
scissor.offset.y = viewport_offset.y;
scissor.extent.width = viewport_size.width;
scissor.extent.height = viewport_size.height;
vkCmdSetScissor(draw_list[i].command_buffer, 0, 1, &scissor);
r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i;
}
return OK;
}
RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawListID p_id) {
if (p_id < 0) {
return nullptr;
}
if (!draw_list) {
return nullptr;
} else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) {
if (draw_list_split) {
return nullptr;
}
return draw_list;
} else if (p_id >> DrawListID(ID_BASE_SHIFT) == ID_TYPE_SPLIT_DRAW_LIST) {
if (!draw_list_split) {
return nullptr;
}
uint64_t index = p_id & ((DrawListID(1) << DrawListID(ID_BASE_SHIFT)) - 1); //mask
if (index >= draw_list_count) {
return nullptr;
}
return &draw_list[index];
} else {
return nullptr;
}
}
void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const RenderPipeline *pipeline = render_pipeline_owner.get_or_null(p_render_pipeline);
ERR_FAIL_COND(!pipeline);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND(pipeline->validation.framebuffer_format != draw_list_framebuffer_format && pipeline->validation.render_pass != draw_list_current_subpass);
#endif
if (p_render_pipeline == dl->state.pipeline) {
return; //redundant state, return.
}
dl->state.pipeline = p_render_pipeline;
dl->state.pipeline_layout = pipeline->pipeline_layout;
vkCmdBindPipeline(dl->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline);
if (dl->state.pipeline_shader != pipeline->shader) {
// shader changed, so descriptor sets may become incompatible.
//go through ALL sets, and unbind them (and all those above) if the format is different
uint32_t pcount = pipeline->set_formats.size(); //formats count in this pipeline
dl->state.set_count = MAX(dl->state.set_count, pcount);
const uint32_t *pformats = pipeline->set_formats.ptr(); //pipeline set formats
bool sets_valid = true; //once invalid, all above become invalid
for (uint32_t i = 0; i < pcount; i++) {
//if a part of the format is different, invalidate it (and the rest)
if (!sets_valid || dl->state.sets[i].pipeline_expected_format != pformats[i]) {
dl->state.sets[i].bound = false;
dl->state.sets[i].pipeline_expected_format = pformats[i];
sets_valid = false;
}
}
for (uint32_t i = pcount; i < dl->state.set_count; i++) {
//unbind the ones above (not used) if exist
dl->state.sets[i].bound = false;
}
dl->state.set_count = pcount; //update set count
if (pipeline->push_constant_size) {
dl->state.pipeline_push_constant_stages = pipeline->push_constant_stages;
#ifdef DEBUG_ENABLED
dl->validation.pipeline_push_constant_supplied = false;
#endif
}
dl->state.pipeline_shader = pipeline->shader;
}
#ifdef DEBUG_ENABLED
//update render pass pipeline info
dl->validation.pipeline_active = true;
dl->validation.pipeline_dynamic_state = pipeline->validation.dynamic_state;
dl->validation.pipeline_vertex_format = pipeline->validation.vertex_format;
dl->validation.pipeline_uses_restart_indices = pipeline->validation.uses_restart_indices;
dl->validation.pipeline_primitive_divisor = pipeline->validation.primitive_divisor;
dl->validation.pipeline_primitive_minimum = pipeline->validation.primitive_minimum;
dl->validation.pipeline_push_constant_size = pipeline->push_constant_size;
#endif
}
void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS,
"Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ").");
#endif
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set);
ERR_FAIL_COND(!uniform_set);
if (p_index > dl->state.set_count) {
dl->state.set_count = p_index;
}
dl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; //update set pointer
dl->state.sets[p_index].bound = false; //needs rebind
dl->state.sets[p_index].uniform_set_format = uniform_set->format;
dl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t mst_count = uniform_set->mutable_storage_textures.size();
if (mst_count) {
Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < mst_count; i++) {
if (mst_textures[i]->used_in_frame != frames_drawn) {
mst_textures[i]->used_in_frame = frames_drawn;
mst_textures[i]->used_in_transfer = false;
mst_textures[i]->used_in_compute = false;
}
mst_textures[i]->used_in_raster = true;
}
}
#ifdef DEBUG_ENABLED
{ //validate that textures bound are not attached as framebuffer bindings
uint32_t attachable_count = uniform_set->attachable_textures.size();
const UniformSet::AttachableTexture *attachable_ptr = uniform_set->attachable_textures.ptr();
uint32_t bound_count = draw_list_bound_textures.size();
const RID *bound_ptr = draw_list_bound_textures.ptr();
for (uint32_t i = 0; i < attachable_count; i++) {
for (uint32_t j = 0; j < bound_count; j++) {
ERR_FAIL_COND_MSG(attachable_ptr[i].texture == bound_ptr[j],
"Attempted to use the same texture in framebuffer attachment and a uniform (set: " + itos(p_index) + ", binding: " + itos(attachable_ptr[i].bind) + "), this is not allowed.");
}
}
}
#endif
}
void RenderingDeviceVulkan::draw_list_bind_vertex_array(DrawListID p_list, RID p_vertex_array) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array);
ERR_FAIL_COND(!vertex_array);
if (dl->state.vertex_array == p_vertex_array) {
return; //already set
}
dl->state.vertex_array = p_vertex_array;
#ifdef DEBUG_ENABLED
dl->validation.vertex_format = vertex_array->description;
dl->validation.vertex_max_instances_allowed = vertex_array->max_instances_allowed;
#endif
dl->validation.vertex_array_size = vertex_array->vertex_count;
vkCmdBindVertexBuffers(dl->command_buffer, 0, vertex_array->buffers.size(), vertex_array->buffers.ptr(), vertex_array->offsets.ptr());
}
void RenderingDeviceVulkan::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const IndexArray *index_array = index_array_owner.get_or_null(p_index_array);
ERR_FAIL_COND(!index_array);
if (dl->state.index_array == p_index_array) {
return; //already set
}
dl->state.index_array = p_index_array;
#ifdef DEBUG_ENABLED
dl->validation.index_array_max_index = index_array->max_index;
#endif
dl->validation.index_array_size = index_array->indices;
dl->validation.index_array_offset = index_array->offset;
vkCmdBindIndexBuffer(dl->command_buffer, index_array->buffer, index_array->offset, index_array->index_type);
}
void RenderingDeviceVulkan::draw_list_set_line_width(DrawListID p_list, float p_width) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
vkCmdSetLineWidth(dl->command_buffer, p_width);
}
void RenderingDeviceVulkan::draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_data_size != dl->validation.pipeline_push_constant_size,
"This render pipeline requires (" + itos(dl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")");
#endif
vkCmdPushConstants(dl->command_buffer, dl->state.pipeline_layout, dl->state.pipeline_push_constant_stages, 0, p_data_size, p_data);
#ifdef DEBUG_ENABLED
dl->validation.pipeline_push_constant_supplied = true;
#endif
}
void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances, uint32_t p_procedural_vertices) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.pipeline_active,
"No render pipeline was set before attempting to draw.");
if (dl->validation.pipeline_vertex_format != INVALID_ID) {
//pipeline uses vertices, validate format
ERR_FAIL_COND_MSG(dl->validation.vertex_format == INVALID_ID,
"No vertex array was bound, and render pipeline expects vertices.");
//make sure format is right
ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != dl->validation.vertex_format,
"The vertex format used to create the pipeline does not match the vertex format bound.");
//make sure number of instances is valid
ERR_FAIL_COND_MSG(p_instances > dl->validation.vertex_max_instances_allowed,
"Number of instances requested (" + itos(p_instances) + " is larger than the maximum number supported by the bound vertex array (" + itos(dl->validation.vertex_max_instances_allowed) + ").");
}
if (dl->validation.pipeline_push_constant_size > 0) {
//using push constants, check that they were supplied
ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
//Bind descriptor sets
for (uint32_t i = 0; i < dl->state.set_count; i++) {
if (dl->state.sets[i].pipeline_expected_format == 0) {
continue; //nothing expected by this pipeline
}
#ifdef DEBUG_ENABLED
if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) {
if (dl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader));
}
}
#endif
if (!dl->state.sets[i].bound) {
//All good, see if this requires re-binding
vkCmdBindDescriptorSets(dl->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, dl->state.pipeline_layout, i, 1, &dl->state.sets[i].descriptor_set, 0, nullptr);
dl->state.sets[i].bound = true;
}
}
if (p_use_indices) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_procedural_vertices > 0,
"Procedural vertices can't be used together with indices.");
ERR_FAIL_COND_MSG(!dl->validation.index_array_size,
"Draw command requested indices, but no index buffer was set.");
if (dl->validation.pipeline_vertex_format != INVALID_ID) {
//uses vertices, do some vertex validations
ERR_FAIL_COND_MSG(dl->validation.vertex_array_size < dl->validation.index_array_max_index,
"Index array references (max index: " + itos(dl->validation.index_array_max_index) + ") indices beyond the vertex array size (" + itos(dl->validation.vertex_array_size) + ").");
}
ERR_FAIL_COND_MSG(dl->validation.pipeline_uses_restart_indices != dl->validation.index_buffer_uses_restart_indices,
"The usage of restart indices in index buffer does not match the render primitive in the pipeline.");
#endif
uint32_t to_draw = dl->validation.index_array_size;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(to_draw < dl->validation.pipeline_primitive_minimum,
"Too few indices (" + itos(to_draw) + ") for the render primitive set in the render pipeline (" + itos(dl->validation.pipeline_primitive_minimum) + ").");
ERR_FAIL_COND_MSG((to_draw % dl->validation.pipeline_primitive_divisor) != 0,
"Index amount (" + itos(to_draw) + ") must be a multiple of the amount of indices required by the render primitive (" + itos(dl->validation.pipeline_primitive_divisor) + ").");
#endif
vkCmdDrawIndexed(dl->command_buffer, to_draw, p_instances, dl->validation.index_array_offset, 0, 0);
} else {
uint32_t to_draw;
if (p_procedural_vertices > 0) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != INVALID_ID,
"Procedural vertices requested, but pipeline expects a vertex array.");
#endif
to_draw = p_procedural_vertices;
} else {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format == INVALID_ID,
"Draw command lacks indices, but pipeline format does not use vertices.");
#endif
to_draw = dl->validation.vertex_array_size;
}
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(to_draw < dl->validation.pipeline_primitive_minimum,
"Too few vertices (" + itos(to_draw) + ") for the render primitive set in the render pipeline (" + itos(dl->validation.pipeline_primitive_minimum) + ").");
ERR_FAIL_COND_MSG((to_draw % dl->validation.pipeline_primitive_divisor) != 0,
"Vertex amount (" + itos(to_draw) + ") must be a multiple of the amount of vertices required by the render primitive (" + itos(dl->validation.pipeline_primitive_divisor) + ").");
#endif
vkCmdDraw(dl->command_buffer, to_draw, p_instances, 0, 0);
}
}
void RenderingDeviceVulkan::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
Rect2i rect = p_rect;
rect.position += dl->viewport.position;
rect = dl->viewport.intersection(rect);
if (rect.get_area() == 0) {
return;
}
VkRect2D scissor;
scissor.offset.x = rect.position.x;
scissor.offset.y = rect.position.y;
scissor.extent.width = rect.size.width;
scissor.extent.height = rect.size.height;
vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor);
}
void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
VkRect2D scissor;
scissor.offset.x = dl->viewport.position.x;
scissor.offset.y = dl->viewport.position.y;
scissor.extent.width = dl->viewport.size.width;
scissor.extent.height = dl->viewport.size.height;
vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor);
}
uint32_t RenderingDeviceVulkan::draw_list_get_current_pass() {
return draw_list_current_subpass;
}
RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_switch_to_next_pass() {
ERR_FAIL_COND_V(draw_list == nullptr, INVALID_ID);
ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, INVALID_FORMAT_ID);
draw_list_current_subpass++;
Rect2i viewport;
_draw_list_free(&viewport);
vkCmdNextSubpass(frames[frame].draw_command_buffer, VK_SUBPASS_CONTENTS_INLINE);
_draw_list_allocate(viewport, 0, draw_list_current_subpass);
return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
}
Error RenderingDeviceVulkan::draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids) {
ERR_FAIL_COND_V(draw_list == nullptr, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, ERR_INVALID_PARAMETER);
draw_list_current_subpass++;
Rect2i viewport;
_draw_list_free(&viewport);
vkCmdNextSubpass(frames[frame].draw_command_buffer, VK_SUBPASS_CONTENTS_INLINE);
_draw_list_allocate(viewport, p_splits, draw_list_current_subpass);
for (uint32_t i = 0; i < p_splits; i++) {
r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i;
}
return OK;
}
Error RenderingDeviceVulkan::_draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass) {
// Lock while draw_list is active
_THREAD_SAFE_LOCK_
if (p_splits == 0) {
draw_list = memnew(DrawList);
draw_list->command_buffer = frames[frame].draw_command_buffer;
draw_list->viewport = p_viewport;
draw_list_count = 0;
draw_list_split = false;
} else {
if (p_splits > (uint32_t)split_draw_list_allocators.size()) {
uint32_t from = split_draw_list_allocators.size();
split_draw_list_allocators.resize(p_splits);
for (uint32_t i = from; i < p_splits; i++) {
VkCommandPoolCreateInfo cmd_pool_info;
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_pool_info.pNext = nullptr;
cmd_pool_info.queueFamilyIndex = context->get_graphics_queue_family_index();
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &split_draw_list_allocators.write[i].command_pool);
ERR_FAIL_COND_V_MSG(res, ERR_CANT_CREATE, "vkCreateCommandPool failed with error " + itos(res) + ".");
for (int j = 0; j < frame_count; j++) {
VkCommandBuffer command_buffer;
VkCommandBufferAllocateInfo cmdbuf;
//no command buffer exists, create it.
cmdbuf.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cmdbuf.pNext = nullptr;
cmdbuf.commandPool = split_draw_list_allocators[i].command_pool;
cmdbuf.level = VK_COMMAND_BUFFER_LEVEL_SECONDARY;
cmdbuf.commandBufferCount = 1;
VkResult err = vkAllocateCommandBuffers(device, &cmdbuf, &command_buffer);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
split_draw_list_allocators.write[i].command_buffers.push_back(command_buffer);
}
}
}
draw_list = memnew_arr(DrawList, p_splits);
draw_list_count = p_splits;
draw_list_split = true;
for (uint32_t i = 0; i < p_splits; i++) {
//take a command buffer and initialize it
VkCommandBuffer command_buffer = split_draw_list_allocators[i].command_buffers[frame];
VkCommandBufferInheritanceInfo inheritance_info;
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
inheritance_info.pNext = nullptr;
inheritance_info.renderPass = draw_list_render_pass;
inheritance_info.subpass = p_subpass;
inheritance_info.framebuffer = draw_list_vkframebuffer;
inheritance_info.occlusionQueryEnable = false;
inheritance_info.queryFlags = 0; //?
inheritance_info.pipelineStatistics = 0;
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
cmdbuf_begin.pInheritanceInfo = &inheritance_info;
VkResult res = vkResetCommandBuffer(command_buffer, 0);
if (res) {
memdelete_arr(draw_list);
draw_list = nullptr;
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "vkResetCommandBuffer failed with error " + itos(res) + ".");
}
res = vkBeginCommandBuffer(command_buffer, &cmdbuf_begin);
if (res) {
memdelete_arr(draw_list);
draw_list = nullptr;
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "vkBeginCommandBuffer failed with error " + itos(res) + ".");
}
draw_list[i].command_buffer = command_buffer;
draw_list[i].viewport = p_viewport;
}
}
return OK;
}
void RenderingDeviceVulkan::_draw_list_free(Rect2i *r_last_viewport) {
if (draw_list_split) {
//send all command buffers
VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * draw_list_count);
for (uint32_t i = 0; i < draw_list_count; i++) {
vkEndCommandBuffer(draw_list[i].command_buffer);
command_buffers[i] = draw_list[i].command_buffer;
if (r_last_viewport) {
if (i == 0 || draw_list[i].viewport_set) {
*r_last_viewport = draw_list[i].viewport;
}
}
}
vkCmdExecuteCommands(frames[frame].draw_command_buffer, draw_list_count, command_buffers);
memdelete_arr(draw_list);
draw_list = nullptr;
} else {
if (r_last_viewport) {
*r_last_viewport = draw_list->viewport;
}
//just end the list
memdelete(draw_list);
draw_list = nullptr;
}
// draw_list is no longer active
_THREAD_SAFE_UNLOCK_
}
void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive.");
_draw_list_free();
vkCmdEndRenderPass(frames[frame].draw_command_buffer);
for (int i = 0; i < draw_list_bound_textures.size(); i++) {
Texture *texture = texture_owner.get_or_null(draw_list_bound_textures[i]);
ERR_CONTINUE(!texture); //wtf
if (draw_list_unbind_color_textures && (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
texture->bound = false;
}
if (draw_list_unbind_depth_textures && (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
texture->bound = false;
}
}
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/;
}
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
draw_list_bound_textures.clear();
VkImageMemoryBarrier *image_barriers = nullptr;
uint32_t image_barrier_count = draw_list_storage_textures.size();
if (image_barrier_count) {
image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size());
}
uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
if (image_barrier_count) {
src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
src_access |= VK_ACCESS_SHADER_WRITE_BIT;
}
for (uint32_t i = 0; i < image_barrier_count; i++) {
Texture *texture = texture_owner.get_or_null(draw_list_storage_textures[i]);
VkImageMemoryBarrier &image_memory_barrier = image_barriers[i];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = src_access;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = texture->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer;
image_memory_barrier.subresourceRange.layerCount = texture->layers;
texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
draw_list_storage_textures.clear();
// To ensure proper synchronization, we must make sure rendering is done before:
// * Some buffer is copied
// * Another render pass happens (since we may be done)
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = src_access;
mem_barrier.dstAccessMask = access_flags;
if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) {
vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers);
}
#endif
}
/***********************/
/**** COMPUTE LISTS ****/
/***********************/
RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) {
ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time.");
// Lock while compute_list is active
_THREAD_SAFE_LOCK_
compute_list = memnew(ComputeList);
compute_list->command_buffer = frames[frame].draw_command_buffer;
compute_list->state.allow_draw_overlap = p_allow_draw_overlap;
return ID_TYPE_COMPUTE_LIST;
}
void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
const ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_compute_pipeline);
ERR_FAIL_COND(!pipeline);
if (p_compute_pipeline == cl->state.pipeline) {
return; //redundant state, return.
}
cl->state.pipeline = p_compute_pipeline;
cl->state.pipeline_layout = pipeline->pipeline_layout;
vkCmdBindPipeline(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
if (cl->state.pipeline_shader != pipeline->shader) {
// shader changed, so descriptor sets may become incompatible.
//go through ALL sets, and unbind them (and all those above) if the format is different
uint32_t pcount = pipeline->set_formats.size(); //formats count in this pipeline
cl->state.set_count = MAX(cl->state.set_count, pcount);
const uint32_t *pformats = pipeline->set_formats.ptr(); //pipeline set formats
bool sets_valid = true; //once invalid, all above become invalid
for (uint32_t i = 0; i < pcount; i++) {
//if a part of the format is different, invalidate it (and the rest)
if (!sets_valid || cl->state.sets[i].pipeline_expected_format != pformats[i]) {
cl->state.sets[i].bound = false;
cl->state.sets[i].pipeline_expected_format = pformats[i];
sets_valid = false;
}
}
for (uint32_t i = pcount; i < cl->state.set_count; i++) {
//unbind the ones above (not used) if exist
cl->state.sets[i].bound = false;
}
cl->state.set_count = pcount; //update set count
if (pipeline->push_constant_size) {
cl->state.pipeline_push_constant_stages = pipeline->push_constant_stages;
#ifdef DEBUG_ENABLED
cl->validation.pipeline_push_constant_supplied = false;
#endif
}
cl->state.pipeline_shader = pipeline->shader;
cl->state.local_group_size[0] = pipeline->local_group_size[0];
cl->state.local_group_size[1] = pipeline->local_group_size[1];
cl->state.local_group_size[2] = pipeline->local_group_size[2];
}
#ifdef DEBUG_ENABLED
//update compute pass pipeline info
cl->validation.pipeline_active = true;
cl->validation.pipeline_push_constant_size = pipeline->push_constant_size;
#endif
}
void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS,
"Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ").");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set);
ERR_FAIL_COND(!uniform_set);
if (p_index > cl->state.set_count) {
cl->state.set_count = p_index;
}
cl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; //update set pointer
cl->state.sets[p_index].bound = false; //needs rebind
cl->state.sets[p_index].uniform_set_format = uniform_set->format;
cl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size();
uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size();
Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw();
VkImageMemoryBarrier *texture_barriers = nullptr;
if (textures_to_sampled_count + textures_to_storage_count) {
texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count));
}
uint32_t texture_barrier_count = 0;
uint32_t src_stage_flags = 0;
for (uint32_t i = 0; i < textures_to_sampled_count; i++) {
if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_sampled[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = textures_to_sampled[i]->image;
image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = textures_to_sampled[i]->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = textures_to_sampled[i]->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer;
image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers;
textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]);
}
if (textures_to_sampled[i]->used_in_frame != frames_drawn) {
textures_to_sampled[i]->used_in_frame = frames_drawn;
textures_to_sampled[i]->used_in_transfer = false;
textures_to_sampled[i]->used_in_raster = false;
}
textures_to_sampled[i]->used_in_compute = true;
}
Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < textures_to_storage_count; i++) {
if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) {
uint32_t src_access_flags = 0;
if (textures_to_storage[i]->used_in_frame == frames_drawn) {
if (textures_to_storage[i]->used_in_compute) {
src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (textures_to_storage[i]->used_in_raster) {
src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (textures_to_storage[i]->used_in_transfer) {
src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_raster = false;
textures_to_storage[i]->used_in_transfer = false;
} else {
src_access_flags = 0;
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_raster = false;
textures_to_storage[i]->used_in_transfer = false;
textures_to_storage[i]->used_in_frame = frames_drawn;
}
VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = src_access_flags;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_storage[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = textures_to_storage[i]->image;
image_memory_barrier.subresourceRange.aspectMask = textures_to_storage[i]->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = textures_to_storage[i]->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = textures_to_storage[i]->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer;
image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers;
textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL;
cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards
}
}
if (texture_barrier_count) {
if (src_stage_flags == 0) {
src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers);
}
#if 0
{ //validate that textures bound are not attached as framebuffer bindings
uint32_t attachable_count = uniform_set->attachable_textures.size();
const RID *attachable_ptr = uniform_set->attachable_textures.ptr();
uint32_t bound_count = draw_list_bound_textures.size();
const RID *bound_ptr = draw_list_bound_textures.ptr();
for (uint32_t i = 0; i < attachable_count; i++) {
for (uint32_t j = 0; j < bound_count; j++) {
ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j],
"Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed.");
}
}
}
#endif
}
void RenderingDeviceVulkan::compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_data_size != cl->validation.pipeline_push_constant_size,
"This compute pipeline requires (" + itos(cl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")");
#endif
vkCmdPushConstants(cl->command_buffer, cl->state.pipeline_layout, cl->state.pipeline_push_constant_stages, 0, p_data_size, p_data);
#ifdef DEBUG_ENABLED
cl->validation.pipeline_push_constant_supplied = true;
#endif
}
void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_x_groups == 0, "Dispatch amount of X compute groups (" + itos(p_x_groups) + ") is zero.");
ERR_FAIL_COND_MSG(p_z_groups == 0, "Dispatch amount of Z compute groups (" + itos(p_z_groups) + ") is zero.");
ERR_FAIL_COND_MSG(p_y_groups == 0, "Dispatch amount of Y compute groups (" + itos(p_y_groups) + ") is zero.");
ERR_FAIL_COND_MSG(p_x_groups > limits.maxComputeWorkGroupCount[0],
"Dispatch amount of X compute groups (" + itos(p_x_groups) + ") is larger than device limit (" + itos(limits.maxComputeWorkGroupCount[0]) + ")");
ERR_FAIL_COND_MSG(p_y_groups > limits.maxComputeWorkGroupCount[1],
"Dispatch amount of Y compute groups (" + itos(p_y_groups) + ") is larger than device limit (" + itos(limits.maxComputeWorkGroupCount[1]) + ")");
ERR_FAIL_COND_MSG(p_z_groups > limits.maxComputeWorkGroupCount[2],
"Dispatch amount of Z compute groups (" + itos(p_z_groups) + ") is larger than device limit (" + itos(limits.maxComputeWorkGroupCount[2]) + ")");
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
//using push constants, check that they were supplied
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
//Bind descriptor sets
for (uint32_t i = 0; i < cl->state.set_count; i++) {
if (cl->state.sets[i].pipeline_expected_format == 0) {
continue; //nothing expected by this pipeline
}
#ifdef DEBUG_ENABLED
if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) {
if (cl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
}
}
#endif
if (!cl->state.sets[i].bound) {
//All good, see if this requires re-binding
vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, nullptr);
cl->state.sets[i].bound = true;
}
}
vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups);
}
void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_x_threads == 0, "Dispatch amount of X compute threads (" + itos(p_x_threads) + ") is zero.");
ERR_FAIL_COND_MSG(p_y_threads == 0, "Dispatch amount of Y compute threads (" + itos(p_y_threads) + ") is zero.");
ERR_FAIL_COND_MSG(p_z_threads == 0, "Dispatch amount of Z compute threads (" + itos(p_z_threads) + ") is zero.");
#endif
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
//using push constants, check that they were supplied
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1);
}
void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
Buffer *buffer = storage_buffer_owner.get_or_null(p_buffer);
ERR_FAIL_COND(!buffer);
ERR_FAIL_COND_MSG(!(buffer->usage & STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT), "Buffer provided was not created to do indirect dispatch.");
ERR_FAIL_COND_MSG(p_offset + 12 > buffer->size, "Offset provided (+12) is past the end of buffer.");
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
//using push constants, check that they were supplied
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
//Bind descriptor sets
for (uint32_t i = 0; i < cl->state.set_count; i++) {
if (cl->state.sets[i].pipeline_expected_format == 0) {
continue; //nothing expected by this pipeline
}
#ifdef DEBUG_ENABLED
if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) {
if (cl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
}
}
#endif
if (!cl->state.sets[i].bound) {
//All good, see if this requires re-binding
vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, nullptr);
cl->state.sets[i].bound = true;
}
}
vkCmdDispatchIndirect(cl->command_buffer, buffer->buffer, p_offset);
}
void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) {
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
_memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, true);
#endif
}
void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) {
ERR_FAIL_COND(!compute_list);
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier *image_barriers = nullptr;
uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size();
if (image_barrier_count) {
image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count);
}
uint32_t barrier_idx = 0;
for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) {
VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = E->get()->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = E->get()->image;
image_memory_barrier.subresourceRange.aspectMask = E->get()->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = E->get()->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = E->get()->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer;
image_memory_barrier.subresourceRange.layerCount = E->get()->layers;
E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
if (E->get()->used_in_frame != frames_drawn) {
E->get()->used_in_transfer = false;
E->get()->used_in_raster = false;
E->get()->used_in_compute = false;
E->get()->used_in_frame = frames_drawn;
}
}
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
mem_barrier.dstAccessMask = access_flags;
if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) {
vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers);
}
#endif
memdelete(compute_list);
compute_list = nullptr;
// compute_list is no longer active
_THREAD_SAFE_UNLOCK_
}
void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) {
uint32_t src_barrier_flags = 0;
uint32_t src_access_flags = 0;
if (p_from & BARRIER_MASK_COMPUTE) {
src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_from & BARRIER_MASK_RASTER) {
src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
if (p_from & BARRIER_MASK_TRANSFER) {
src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (p_from == 0) {
src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
uint32_t dst_barrier_flags = 0;
uint32_t dst_access_flags = 0;
if (p_to & BARRIER_MASK_COMPUTE) {
dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_to & BARRIER_MASK_RASTER) {
dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_to & BARRIER_MASK_TRANSFER) {
dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
if (p_to == 0) {
dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
_memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true);
}
void RenderingDeviceVulkan::full_barrier() {
#ifndef DEBUG_ENABLED
ERR_PRINT("Full barrier is debug-only, should not be used in production");
#endif
_full_barrier(true);
}
#if 0
void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) {
VkCommandBuffer frame_cmdbuf = frames[frame].frame_buffer;
ERR_FAIL_COND(!frame_cmdbuf);
VkRenderPassBeginInfo render_pass_begin;
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = context->get_render_pass();
render_pass_begin.framebuffer = context->get_frame_framebuffer(frame);
render_pass_begin.renderArea.extent.width = context->get_screen_width(p_screen);
render_pass_begin.renderArea.extent.height = context->get_screen_height(p_screen);
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
render_pass_begin.clearValueCount = 1;
VkClearValue clear_value;
clear_value.color.float32[0] = p_clear_color.r;
clear_value.color.float32[1] = p_clear_color.g;
clear_value.color.float32[2] = p_clear_color.b;
clear_value.color.float32[3] = p_clear_color.a;
render_pass_begin.pClearValues = &clear_value;
vkCmdBeginRenderPass(frame_cmdbuf, &render_pass_begin, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
ID screen_format = screen_get_framebuffer_format();
{
VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * p_draw_list_count);
uint32_t command_buffer_count = 0;
for (uint32_t i = 0; i < p_draw_list_count; i++) {
DrawList *dl = _get_draw_list_ptr(p_draw_lists[i]);
ERR_CONTINUE_MSG(!dl, "Draw list index (" + itos(i) + ") is not a valid draw list ID.");
ERR_CONTINUE_MSG(dl->validation.framebuffer_format != p_format_check,
"Draw list index (" + itos(i) + ") is created with a framebuffer format incompatible with this render pass.");
if (dl->validation.active) {
//needs to be closed, so close it.
vkEndCommandBuffer(dl->command_buffer);
dl->validation.active = false;
}
command_buffers[command_buffer_count++] = dl->command_buffer;
}
print_line("to draw: " + itos(command_buffer_count));
vkCmdExecuteCommands(p_primary, command_buffer_count, command_buffers);
}
vkCmdEndRenderPass(frame_cmdbuf);
}
#endif
void RenderingDeviceVulkan::_free_internal(RID p_id) {
//push everything so it's disposed of next time this frame index is processed (means, it's safe to do it)
if (texture_owner.owns(p_id)) {
Texture *texture = texture_owner.get_or_null(p_id);
frames[frame].textures_to_dispose_of.push_back(*texture);
texture_owner.free(p_id);
} else if (framebuffer_owner.owns(p_id)) {
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_id);
frames[frame].framebuffers_to_dispose_of.push_back(*framebuffer);
framebuffer_owner.free(p_id);
} else if (sampler_owner.owns(p_id)) {
VkSampler *sampler = sampler_owner.get_or_null(p_id);
frames[frame].samplers_to_dispose_of.push_back(*sampler);
sampler_owner.free(p_id);
} else if (vertex_buffer_owner.owns(p_id)) {
Buffer *vertex_buffer = vertex_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(*vertex_buffer);
vertex_buffer_owner.free(p_id);
} else if (vertex_array_owner.owns(p_id)) {
vertex_array_owner.free(p_id);
} else if (index_buffer_owner.owns(p_id)) {
IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_id);
Buffer b;
b.allocation = index_buffer->allocation;
b.buffer = index_buffer->buffer;
b.size = index_buffer->size;
b.buffer_info = {};
frames[frame].buffers_to_dispose_of.push_back(b);
index_buffer_owner.free(p_id);
} else if (index_array_owner.owns(p_id)) {
index_array_owner.free(p_id);
} else if (shader_owner.owns(p_id)) {
Shader *shader = shader_owner.get_or_null(p_id);
frames[frame].shaders_to_dispose_of.push_back(*shader);
shader_owner.free(p_id);
} else if (uniform_buffer_owner.owns(p_id)) {
Buffer *uniform_buffer = uniform_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(*uniform_buffer);
uniform_buffer_owner.free(p_id);
} else if (texture_buffer_owner.owns(p_id)) {
TextureBuffer *texture_buffer = texture_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(texture_buffer->buffer);
frames[frame].buffer_views_to_dispose_of.push_back(texture_buffer->view);
texture_buffer_owner.free(p_id);
} else if (storage_buffer_owner.owns(p_id)) {
Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(*storage_buffer);
storage_buffer_owner.free(p_id);
} else if (uniform_set_owner.owns(p_id)) {
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id);
frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set);
uniform_set_owner.free(p_id);
if (uniform_set->invalidated_callback != nullptr) {
uniform_set->invalidated_callback(uniform_set->invalidated_callback_userdata);
}
} else if (render_pipeline_owner.owns(p_id)) {
RenderPipeline *pipeline = render_pipeline_owner.get_or_null(p_id);
frames[frame].render_pipelines_to_dispose_of.push_back(*pipeline);
render_pipeline_owner.free(p_id);
} else if (compute_pipeline_owner.owns(p_id)) {
ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id);
frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline);
compute_pipeline_owner.free(p_id);
} else {
ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()));
}
}
void RenderingDeviceVulkan::free(RID p_id) {
_THREAD_SAFE_METHOD_
_free_dependencies(p_id); //recursively erase dependencies first, to avoid potential API problems
_free_internal(p_id);
}
// The full list of resources that can be named is in the VkObjectType enum
// We just expose the resources that are owned and can be accessed easily.
void RenderingDeviceVulkan::set_resource_name(RID p_id, const String p_name) {
if (texture_owner.owns(p_id)) {
Texture *texture = texture_owner.get_or_null(p_id);
if (texture->owner.is_null()) {
// Don't set the source texture's name when calling on a texture view
context->set_object_name(VK_OBJECT_TYPE_IMAGE, uint64_t(texture->image), p_name);
}
context->set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, uint64_t(texture->view), p_name + " View");
} else if (framebuffer_owner.owns(p_id)) {
//Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_id);
// Not implemented for now as the relationship between Framebuffer and RenderPass is very complex
} else if (sampler_owner.owns(p_id)) {
VkSampler *sampler = sampler_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_SAMPLER, uint64_t(*sampler), p_name);
} else if (vertex_buffer_owner.owns(p_id)) {
Buffer *vertex_buffer = vertex_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(vertex_buffer->buffer), p_name);
} else if (index_buffer_owner.owns(p_id)) {
IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(index_buffer->buffer), p_name);
} else if (shader_owner.owns(p_id)) {
Shader *shader = shader_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(shader->pipeline_layout), p_name + " Pipeline Layout");
for (int i = 0; i < shader->sets.size(); i++) {
context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, uint64_t(shader->sets[i].descriptor_set_layout), p_name);
}
} else if (uniform_buffer_owner.owns(p_id)) {
Buffer *uniform_buffer = uniform_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(uniform_buffer->buffer), p_name);
} else if (texture_buffer_owner.owns(p_id)) {
TextureBuffer *texture_buffer = texture_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(texture_buffer->buffer.buffer), p_name);
context->set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, uint64_t(texture_buffer->view), p_name + " View");
} else if (storage_buffer_owner.owns(p_id)) {
Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(storage_buffer->buffer), p_name);
} else if (uniform_set_owner.owns(p_id)) {
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, uint64_t(uniform_set->descriptor_set), p_name);
} else if (render_pipeline_owner.owns(p_id)) {
RenderPipeline *pipeline = render_pipeline_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout");
} else if (compute_pipeline_owner.owns(p_id)) {
ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout");
} else {
ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id()));
}
}
void RenderingDeviceVulkan::draw_command_begin_label(String p_label_name, const Color p_color) {
context->command_begin_label(frames[frame].draw_command_buffer, p_label_name, p_color);
}
void RenderingDeviceVulkan::draw_command_insert_label(String p_label_name, const Color p_color) {
context->command_insert_label(frames[frame].draw_command_buffer, p_label_name, p_color);
}
void RenderingDeviceVulkan::draw_command_end_label() {
context->command_end_label(frames[frame].draw_command_buffer);
}
String RenderingDeviceVulkan::get_device_vendor_name() const {
return context->get_device_vendor_name();
}
String RenderingDeviceVulkan::get_device_name() const {
return context->get_device_name();
}
RenderingDevice::DeviceType RenderingDeviceVulkan::get_device_type() const {
return context->get_device_type();
}
String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const {
return context->get_device_pipeline_cache_uuid();
}
void RenderingDeviceVulkan::_finalize_command_bufers() {
if (draw_list) {
ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work).");
}
if (compute_list) {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}
{ //complete the setup buffer (that needs to be processed before anything else)
vkEndCommandBuffer(frames[frame].setup_command_buffer);
vkEndCommandBuffer(frames[frame].draw_command_buffer);
}
}
void RenderingDeviceVulkan::_begin_frame() {
//erase pending resources
_free_pending_resources(frame);
//create setup command buffer and set as the setup buffer
{
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkResetCommandBuffer(frames[frame].setup_command_buffer, 0);
ERR_FAIL_COND_MSG(err, "vkResetCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[frame].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[frame].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
if (local_device.is_null()) {
context->append_command_buffer(frames[frame].draw_command_buffer);
context->set_setup_buffer(frames[frame].setup_command_buffer); //append now so it's added before everything else
}
}
//advance current frame
frames_drawn++;
//advance staging buffer if used
if (staging_buffer_used) {
staging_buffer_current = (staging_buffer_current + 1) % staging_buffer_blocks.size();
staging_buffer_used = false;
}
if (frames[frame].timestamp_count) {
vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count);
SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names);
SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values);
}
frames[frame].timestamp_result_count = frames[frame].timestamp_count;
frames[frame].timestamp_count = 0;
frames[frame].index = Engine::get_singleton()->get_frames_drawn();
}
void RenderingDeviceVulkan::swap_buffers() {
ERR_FAIL_COND_MSG(local_device.is_valid(), "Local devices can't swap buffers.");
_THREAD_SAFE_METHOD_
_finalize_command_bufers();
screen_prepared = false;
//swap buffers
context->swap_buffers();
frame = (frame + 1) % frame_count;
_begin_frame();
}
void RenderingDeviceVulkan::submit() {
ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync.");
ERR_FAIL_COND_MSG(local_device_processing, "device already submitted, call sync to wait until done.");
_finalize_command_bufers();
VkCommandBuffer command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer };
context->local_device_push_command_buffers(local_device, command_buffers, 2);
local_device_processing = true;
}
void RenderingDeviceVulkan::sync() {
ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync.");
ERR_FAIL_COND_MSG(!local_device_processing, "sync can only be called after a submit");
context->local_device_sync(local_device);
_begin_frame();
local_device_processing = false;
}
VmaPool RenderingDeviceVulkan::_find_or_create_small_allocs_pool(uint32_t p_mem_type_index) {
if (small_allocs_pools.has(p_mem_type_index)) {
return small_allocs_pools[p_mem_type_index];
}
print_verbose("Creating VMA small objects pool for memory type index " + itos(p_mem_type_index));
VmaPoolCreateInfo pci;
pci.memoryTypeIndex = p_mem_type_index;
pci.flags = 0;
pci.blockSize = 0;
pci.minBlockCount = 0;
pci.maxBlockCount = SIZE_MAX;
pci.priority = 0.5f;
pci.minAllocationAlignment = 0;
pci.pMemoryAllocateNext = nullptr;
VmaPool pool = VK_NULL_HANDLE;
VkResult res = vmaCreatePool(allocator, &pci, &pool);
small_allocs_pools[p_mem_type_index] = pool; // Don't try to create it again if failed the first time
ERR_FAIL_COND_V_MSG(res, pool, "vmaCreatePool failed with error " + itos(res) + ".");
return pool;
}
void RenderingDeviceVulkan::_free_pending_resources(int p_frame) {
//free in dependency usage order, so nothing weird happens
//pipelines
while (frames[p_frame].render_pipelines_to_dispose_of.front()) {
RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get();
vkDestroyPipeline(device, pipeline->pipeline, nullptr);
frames[p_frame].render_pipelines_to_dispose_of.pop_front();
}
while (frames[p_frame].compute_pipelines_to_dispose_of.front()) {
ComputePipeline *pipeline = &frames[p_frame].compute_pipelines_to_dispose_of.front()->get();
vkDestroyPipeline(device, pipeline->pipeline, nullptr);
frames[p_frame].compute_pipelines_to_dispose_of.pop_front();
}
//uniform sets
while (frames[p_frame].uniform_sets_to_dispose_of.front()) {
UniformSet *uniform_set = &frames[p_frame].uniform_sets_to_dispose_of.front()->get();
vkFreeDescriptorSets(device, uniform_set->pool->pool, 1, &uniform_set->descriptor_set);
_descriptor_pool_free(uniform_set->pool_key, uniform_set->pool);
frames[p_frame].uniform_sets_to_dispose_of.pop_front();
}
//buffer views
while (frames[p_frame].buffer_views_to_dispose_of.front()) {
VkBufferView buffer_view = frames[p_frame].buffer_views_to_dispose_of.front()->get();
vkDestroyBufferView(device, buffer_view, nullptr);
frames[p_frame].buffer_views_to_dispose_of.pop_front();
}
//shaders
while (frames[p_frame].shaders_to_dispose_of.front()) {
Shader *shader = &frames[p_frame].shaders_to_dispose_of.front()->get();
//descriptor set layout for each set
for (int i = 0; i < shader->sets.size(); i++) {
vkDestroyDescriptorSetLayout(device, shader->sets[i].descriptor_set_layout, nullptr);
}
//pipeline layout
vkDestroyPipelineLayout(device, shader->pipeline_layout, nullptr);
//shaders themselves
for (int i = 0; i < shader->pipeline_stages.size(); i++) {
vkDestroyShaderModule(device, shader->pipeline_stages[i].module, nullptr);
}
frames[p_frame].shaders_to_dispose_of.pop_front();
}
//samplers
while (frames[p_frame].samplers_to_dispose_of.front()) {
VkSampler sampler = frames[p_frame].samplers_to_dispose_of.front()->get();
vkDestroySampler(device, sampler, nullptr);
frames[p_frame].samplers_to_dispose_of.pop_front();
}
//framebuffers
while (frames[p_frame].framebuffers_to_dispose_of.front()) {
Framebuffer *framebuffer = &frames[p_frame].framebuffers_to_dispose_of.front()->get();
for (const KeyValue<Framebuffer::VersionKey, Framebuffer::Version> &E : framebuffer->framebuffers) {
//first framebuffer, then render pass because it depends on it
vkDestroyFramebuffer(device, E.value.framebuffer, nullptr);
vkDestroyRenderPass(device, E.value.render_pass, nullptr);
}
frames[p_frame].framebuffers_to_dispose_of.pop_front();
}
//textures
while (frames[p_frame].textures_to_dispose_of.front()) {
Texture *texture = &frames[p_frame].textures_to_dispose_of.front()->get();
if (texture->bound) {
WARN_PRINT("Deleted a texture while it was bound..");
}
vkDestroyImageView(device, texture->view, nullptr);
if (texture->owner.is_null()) {
//actually owns the image and the allocation too
image_memory -= texture->allocation_info.size;
vmaDestroyImage(allocator, texture->image, texture->allocation);
}
frames[p_frame].textures_to_dispose_of.pop_front();
}
//buffers
while (frames[p_frame].buffers_to_dispose_of.front()) {
_buffer_free(&frames[p_frame].buffers_to_dispose_of.front()->get());
frames[p_frame].buffers_to_dispose_of.pop_front();
}
}
void RenderingDeviceVulkan::prepare_screen_for_drawing() {
_THREAD_SAFE_METHOD_
context->prepare_buffers();
screen_prepared = true;
}
uint32_t RenderingDeviceVulkan::get_frame_delay() const {
return frame_count;
}
uint64_t RenderingDeviceVulkan::get_memory_usage(MemoryType p_type) const {
if (p_type == MEMORY_BUFFERS) {
return buffer_memory;
} else if (p_type == MEMORY_TEXTURES) {
return image_memory;
} else {
VmaTotalStatistics stats;
vmaCalculateStatistics(allocator, &stats);
return stats.total.statistics.allocationBytes;
}
}
void RenderingDeviceVulkan::_flush(bool p_current_frame) {
if (local_device.is_valid() && !p_current_frame) {
return; //flushing previous frames has no effect with local device
}
//not doing this crashes RADV (undefined behavior)
if (p_current_frame) {
vkEndCommandBuffer(frames[frame].setup_command_buffer);
vkEndCommandBuffer(frames[frame].draw_command_buffer);
}
if (local_device.is_valid()) {
VkCommandBuffer command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer };
context->local_device_push_command_buffers(local_device, command_buffers, 2);
context->local_device_sync(local_device);
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[frame].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[frame].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
} else {
context->flush(p_current_frame, p_current_frame);
//re-create the setup command
if (p_current_frame) {
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[frame].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
context->set_setup_buffer(frames[frame].setup_command_buffer); //append now so it's added before everything else
}
if (p_current_frame) {
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[frame].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
context->append_command_buffer(frames[frame].draw_command_buffer);
}
}
}
void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_device) {
// get our device capabilities
{
device_capabilities.version_major = p_context->get_vulkan_major();
device_capabilities.version_minor = p_context->get_vulkan_minor();
// get info about subgroups
VulkanContext::SubgroupCapabilities subgroup_capabilities = p_context->get_subgroup_capabilities();
device_capabilities.subgroup_size = subgroup_capabilities.size;
device_capabilities.subgroup_in_shaders = subgroup_capabilities.supported_stages_flags_rd();
device_capabilities.subgroup_operations = subgroup_capabilities.supported_operations_flags_rd();
// get info about further features
VulkanContext::MultiviewCapabilities multiview_capabilies = p_context->get_multiview_capabilities();
device_capabilities.supports_multiview = multiview_capabilies.is_supported && multiview_capabilies.max_view_count > 1;
device_capabilities.supports_fsr_half_float = p_context->get_shader_capabilities().shader_float16_is_supported && p_context->get_storage_buffer_capabilities().storage_buffer_16_bit_access_is_supported;
}
context = p_context;
device = p_context->get_device();
if (p_local_device) {
frame_count = 1;
local_device = p_context->local_device_create();
device = p_context->local_device_get_vk_device(local_device);
} else {
frame_count = p_context->get_swapchain_image_count() + 1; //always need one extra to ensure it's unused at any time, without having to use a fence for this.
}
limits = p_context->get_device_limits();
max_timestamp_query_elements = 256;
{ //initialize allocator
VmaAllocatorCreateInfo allocatorInfo;
memset(&allocatorInfo, 0, sizeof(VmaAllocatorCreateInfo));
allocatorInfo.physicalDevice = p_context->get_physical_device();
allocatorInfo.device = device;
allocatorInfo.instance = p_context->get_instance();
vmaCreateAllocator(&allocatorInfo, &allocator);
}
frames = memnew_arr(Frame, frame_count);
frame = 0;
//create setup and frame buffers
for (int i = 0; i < frame_count; i++) {
frames[i].index = 0;
{ //create command pool, one per frame is recommended
VkCommandPoolCreateInfo cmd_pool_info;
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_pool_info.pNext = nullptr;
cmd_pool_info.queueFamilyIndex = p_context->get_graphics_queue_family_index();
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &frames[i].command_pool);
ERR_FAIL_COND_MSG(res, "vkCreateCommandPool failed with error " + itos(res) + ".");
}
{ //create command buffers
VkCommandBufferAllocateInfo cmdbuf;
//no command buffer exists, create it.
cmdbuf.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cmdbuf.pNext = nullptr;
cmdbuf.commandPool = frames[i].command_pool;
cmdbuf.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdbuf.commandBufferCount = 1;
VkResult err = vkAllocateCommandBuffers(device, &cmdbuf, &frames[i].setup_command_buffer);
ERR_CONTINUE_MSG(err, "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
err = vkAllocateCommandBuffers(device, &cmdbuf, &frames[i].draw_command_buffer);
ERR_CONTINUE_MSG(err, "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
}
{
//create query pool
VkQueryPoolCreateInfo query_pool_create_info;
query_pool_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
query_pool_create_info.flags = 0;
query_pool_create_info.pNext = nullptr;
query_pool_create_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
query_pool_create_info.queryCount = max_timestamp_query_elements;
query_pool_create_info.pipelineStatistics = 0;
vkCreateQueryPool(device, &query_pool_create_info, nullptr, &frames[i].timestamp_pool);
frames[i].timestamp_names = memnew_arr(String, max_timestamp_query_elements);
frames[i].timestamp_cpu_values = memnew_arr(uint64_t, max_timestamp_query_elements);
frames[i].timestamp_count = 0;
frames[i].timestamp_result_names = memnew_arr(String, max_timestamp_query_elements);
frames[i].timestamp_cpu_result_values = memnew_arr(uint64_t, max_timestamp_query_elements);
frames[i].timestamp_result_values = memnew_arr(uint64_t, max_timestamp_query_elements);
frames[i].timestamp_result_count = 0;
}
}
{
//begin the first command buffer for the first frame, so
//setting up things can be done in the meantime until swap_buffers(), which is called before advance.
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[0].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[0].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
if (local_device.is_null()) {
context->set_setup_buffer(frames[0].setup_command_buffer); //append now so it's added before everything else
context->append_command_buffer(frames[0].draw_command_buffer);
}
}
// Note: If adding new project settings here, also duplicate their definition in
// rendering_server.cpp for headless doctool.
staging_buffer_block_size = GLOBAL_DEF("rendering/vulkan/staging_buffer/block_size_kb", 256);
staging_buffer_block_size = MAX(4, staging_buffer_block_size);
staging_buffer_block_size *= 1024; //kb -> bytes
staging_buffer_max_size = GLOBAL_DEF("rendering/vulkan/staging_buffer/max_size_mb", 128);
staging_buffer_max_size = MAX(1, staging_buffer_max_size);
staging_buffer_max_size *= 1024 * 1024;
if (staging_buffer_max_size < staging_buffer_block_size * 4) {
//validate enough blocks
staging_buffer_max_size = staging_buffer_block_size * 4;
}
texture_upload_region_size_px = GLOBAL_DEF("rendering/vulkan/staging_buffer/texture_upload_region_size_px", 64);
texture_upload_region_size_px = nearest_power_of_2_templated(texture_upload_region_size_px);
frames_drawn = frame_count; //start from frame count, so everything else is immediately old
//ensure current staging block is valid and at least one per frame exists
staging_buffer_current = 0;
staging_buffer_used = false;
for (int i = 0; i < frame_count; i++) {
//staging was never used, create a block
Error err = _insert_staging_block();
ERR_CONTINUE(err != OK);
}
max_descriptors_per_pool = GLOBAL_DEF("rendering/vulkan/descriptor_pools/max_descriptors_per_pool", 64);
//check to make sure DescriptorPoolKey is good
static_assert(sizeof(uint64_t) * 3 >= UNIFORM_TYPE_MAX * sizeof(uint16_t));
draw_list = nullptr;
draw_list_count = 0;
draw_list_split = false;
compute_list = nullptr;
}
template <class T>
void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) {
List<RID> owned;
p_owner.get_owned_list(&owned);
if (owned.size()) {
if (owned.size() == 1) {
WARN_PRINT(vformat("1 RID of type \"%s\" was leaked.", p_type));
} else {
WARN_PRINT(vformat("%d RIDs of type \"%s\" were leaked.", owned.size(), p_type));
}
for (const RID &E : owned) {
free(E);
}
}
}
void RenderingDeviceVulkan::capture_timestamp(const String &p_name) {
ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name);
ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements);
//this should be optional for profiling, else it will slow things down
{
VkMemoryBarrier memoryBarrier;
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.pNext = nullptr;
memoryBarrier.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT;
memoryBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT;
vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr);
}
vkCmdWriteTimestamp(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count);
frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name;
frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec();
frames[frame].timestamp_count++;
}
uint64_t RenderingDeviceVulkan::get_driver_resource(DriverResource p_resource, RID p_rid, uint64_t p_index) {
_THREAD_SAFE_METHOD_
switch (p_resource) {
case DRIVER_RESOURCE_VULKAN_DEVICE: {
return (uint64_t)context->get_device();
} break;
case DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE: {
return (uint64_t)context->get_physical_device();
} break;
case DRIVER_RESOURCE_VULKAN_INSTANCE: {
return (uint64_t)context->get_instance();
} break;
case DRIVER_RESOURCE_VULKAN_QUEUE: {
return (uint64_t)context->get_graphics_queue();
} break;
case DRIVER_RESOURCE_VULKAN_QUEUE_FAMILY_INDEX: {
return context->get_graphics_queue_family_index();
} break;
case DRIVER_RESOURCE_VULKAN_IMAGE: {
Texture *tex = texture_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(tex, 0);
return (uint64_t)tex->image;
} break;
case DRIVER_RESOURCE_VULKAN_IMAGE_VIEW: {
Texture *tex = texture_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(tex, 0);
return (uint64_t)tex->view;
} break;
case DRIVER_RESOURCE_VULKAN_IMAGE_NATIVE_TEXTURE_FORMAT: {
Texture *tex = texture_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(tex, 0);
return vulkan_formats[tex->format];
} break;
case DRIVER_RESOURCE_VULKAN_SAMPLER: {
VkSampler *sampler = sampler_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(sampler, 0);
return uint64_t(*sampler);
} break;
case DRIVER_RESOURCE_VULKAN_DESCRIPTOR_SET: {
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(uniform_set, 0);
return uint64_t(uniform_set->descriptor_set);
} break;
case DRIVER_RESOURCE_VULKAN_BUFFER: {
Buffer *buffer = nullptr;
if (vertex_buffer_owner.owns(p_rid)) {
buffer = vertex_buffer_owner.get_or_null(p_rid);
} else if (index_buffer_owner.owns(p_rid)) {
buffer = index_buffer_owner.get_or_null(p_rid);
} else if (uniform_buffer_owner.owns(p_rid)) {
buffer = uniform_buffer_owner.get_or_null(p_rid);
} else if (texture_buffer_owner.owns(p_rid)) {
buffer = &texture_buffer_owner.get_or_null(p_rid)->buffer;
} else if (storage_buffer_owner.owns(p_rid)) {
buffer = storage_buffer_owner.get_or_null(p_rid);
}
ERR_FAIL_NULL_V(buffer, 0);
return uint64_t(buffer->buffer);
} break;
case DRIVER_RESOURCE_VULKAN_COMPUTE_PIPELINE: {
ComputePipeline *compute_pipeline = compute_pipeline_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(compute_pipeline, 0);
return uint64_t(compute_pipeline->pipeline);
} break;
case DRIVER_RESOURCE_VULKAN_RENDER_PIPELINE: {
RenderPipeline *render_pipeline = render_pipeline_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(render_pipeline, 0);
return uint64_t(render_pipeline->pipeline);
} break;
default: {
// not supported for this driver
return 0;
} break;
}
}
uint32_t RenderingDeviceVulkan::get_captured_timestamps_count() const {
return frames[frame].timestamp_result_count;
}
uint64_t RenderingDeviceVulkan::get_captured_timestamps_frame() const {
return frames[frame].index;
}
static void mult64to128(uint64_t u, uint64_t v, uint64_t &h, uint64_t &l) {
uint64_t u1 = (u & 0xffffffff);
uint64_t v1 = (v & 0xffffffff);
uint64_t t = (u1 * v1);
uint64_t w3 = (t & 0xffffffff);
uint64_t k = (t >> 32);
u >>= 32;
t = (u * v1) + k;
k = (t & 0xffffffff);
uint64_t w1 = (t >> 32);
v >>= 32;
t = (u1 * v) + k;
k = (t >> 32);
h = (u * v) + w1 + k;
l = (t << 32) + w3;
}
uint64_t RenderingDeviceVulkan::get_captured_timestamp_gpu_time(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0);
// this sucks because timestampPeriod multiplier is a float, while the timestamp is 64 bits nanosecs.
// so, in cases like nvidia which give you enormous numbers and 1 as multiplier, multiplying is next to impossible
// need to do 128 bits fixed point multiplication to get the right value
uint64_t shift_bits = 16;
uint64_t h, l;
mult64to128(frames[frame].timestamp_result_values[p_index], uint64_t(double(limits.timestampPeriod) * double(1 << shift_bits)), h, l);
l >>= shift_bits;
l |= h << (64 - shift_bits);
return l;
}
uint64_t RenderingDeviceVulkan::get_captured_timestamp_cpu_time(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0);
return frames[frame].timestamp_cpu_result_values[p_index];
}
String RenderingDeviceVulkan::get_captured_timestamp_name(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, String());
return frames[frame].timestamp_result_names[p_index];
}
int RenderingDeviceVulkan::limit_get(Limit p_limit) {
switch (p_limit) {
case LIMIT_MAX_BOUND_UNIFORM_SETS:
return limits.maxBoundDescriptorSets;
case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS:
return limits.maxColorAttachments;
case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET:
return limits.maxDescriptorSetSampledImages;
case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET:
return limits.maxDescriptorSetSamplers;
case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET:
return limits.maxDescriptorSetStorageBuffers;
case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET:
return limits.maxDescriptorSetStorageImages;
case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET:
return limits.maxDescriptorSetUniformBuffers;
case LIMIT_MAX_DRAW_INDEXED_INDEX:
return limits.maxDrawIndexedIndexValue;
case LIMIT_MAX_FRAMEBUFFER_HEIGHT:
return limits.maxFramebufferHeight;
case LIMIT_MAX_FRAMEBUFFER_WIDTH:
return limits.maxFramebufferWidth;
case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:
return limits.maxImageArrayLayers;
case LIMIT_MAX_TEXTURE_SIZE_1D:
return limits.maxImageDimension1D;
case LIMIT_MAX_TEXTURE_SIZE_2D:
return limits.maxImageDimension2D;
case LIMIT_MAX_TEXTURE_SIZE_3D:
return limits.maxImageDimension3D;
case LIMIT_MAX_TEXTURE_SIZE_CUBE:
return limits.maxImageDimensionCube;
case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorSampledImages;
case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorSamplers;
case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorStorageBuffers;
case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorStorageImages;
case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorUniformBuffers;
case LIMIT_MAX_PUSH_CONSTANT_SIZE:
return limits.maxPushConstantsSize;
case LIMIT_MAX_UNIFORM_BUFFER_SIZE:
return limits.maxUniformBufferRange;
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET:
return limits.maxVertexInputAttributeOffset;
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES:
return limits.maxVertexInputAttributes;
case LIMIT_MAX_VERTEX_INPUT_BINDINGS:
return limits.maxVertexInputBindings;
case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE:
return limits.maxVertexInputBindingStride;
case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT:
return limits.minUniformBufferOffsetAlignment;
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:
return limits.maxComputeWorkGroupCount[0];
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:
return limits.maxComputeWorkGroupCount[1];
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:
return limits.maxComputeWorkGroupCount[2];
case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS:
return limits.maxComputeWorkGroupInvocations;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:
return limits.maxComputeWorkGroupSize[0];
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:
return limits.maxComputeWorkGroupSize[1];
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxComputeWorkGroupSize[2];
default:
ERR_FAIL_V(0);
}
return 0;
}
void RenderingDeviceVulkan::finalize() {
//free all resources
_flush(false);
_free_rids(render_pipeline_owner, "Pipeline");
_free_rids(compute_pipeline_owner, "Compute");
_free_rids(uniform_set_owner, "UniformSet");
_free_rids(texture_buffer_owner, "TextureBuffer");
_free_rids(storage_buffer_owner, "StorageBuffer");
_free_rids(uniform_buffer_owner, "UniformBuffer");
_free_rids(shader_owner, "Shader");
_free_rids(index_array_owner, "IndexArray");
_free_rids(index_buffer_owner, "IndexBuffer");
_free_rids(vertex_array_owner, "VertexArray");
_free_rids(vertex_buffer_owner, "VertexBuffer");
_free_rids(framebuffer_owner, "Framebuffer");
_free_rids(sampler_owner, "Sampler");
{
//for textures it's a bit more difficult because they may be shared
List<RID> owned;
texture_owner.get_owned_list(&owned);
if (owned.size()) {
if (owned.size() == 1) {
WARN_PRINT("1 RID of type \"Texture\" was leaked.");
} else {
WARN_PRINT(vformat("%d RIDs of type \"Texture\" were leaked.", owned.size()));
}
//free shared first
for (List<RID>::Element *E = owned.front(); E;) {
List<RID>::Element *N = E->next();
if (texture_is_shared(E->get())) {
free(E->get());
owned.erase(E);
}
E = N;
}
//free non shared second, this will avoid an error trying to free unexisting textures due to dependencies.
for (const RID &E : owned) {
free(E);
}
}
}
//free everything pending
for (int i = 0; i < frame_count; i++) {
int f = (frame + i) % frame_count;
_free_pending_resources(f);
vkDestroyCommandPool(device, frames[i].command_pool, nullptr);
vkDestroyQueryPool(device, frames[i].timestamp_pool, nullptr);
memdelete_arr(frames[i].timestamp_names);
memdelete_arr(frames[i].timestamp_cpu_values);
memdelete_arr(frames[i].timestamp_result_names);
memdelete_arr(frames[i].timestamp_result_values);
memdelete_arr(frames[i].timestamp_cpu_result_values);
}
for (int i = 0; i < split_draw_list_allocators.size(); i++) {
vkDestroyCommandPool(device, split_draw_list_allocators[i].command_pool, nullptr);
}
memdelete_arr(frames);
for (int i = 0; i < staging_buffer_blocks.size(); i++) {
vmaDestroyBuffer(allocator, staging_buffer_blocks[i].buffer, staging_buffer_blocks[i].allocation);
}
while (small_allocs_pools.size()) {
Map<uint32_t, VmaPool>::Element *E = small_allocs_pools.front();
vmaDestroyPool(allocator, E->get());
small_allocs_pools.erase(E);
}
vmaDestroyAllocator(allocator);
while (vertex_formats.size()) {
Map<VertexFormatID, VertexDescriptionCache>::Element *temp = vertex_formats.front();
memdelete_arr(temp->get().bindings);
memdelete_arr(temp->get().attributes);
vertex_formats.erase(temp);
}
for (int i = 0; i < framebuffer_formats.size(); i++) {
vkDestroyRenderPass(device, framebuffer_formats[i].render_pass, nullptr);
}
framebuffer_formats.clear();
//all these should be clear at this point
ERR_FAIL_COND(descriptor_pools.size());
ERR_FAIL_COND(dependency_map.size());
ERR_FAIL_COND(reverse_dependency_map.size());
}
RenderingDevice *RenderingDeviceVulkan::create_local_device() {
RenderingDeviceVulkan *rd = memnew(RenderingDeviceVulkan);
rd->initialize(context, true);
return rd;
}
RenderingDeviceVulkan::RenderingDeviceVulkan() {
device_capabilities.device_family = DEVICE_VULKAN;
}
RenderingDeviceVulkan::~RenderingDeviceVulkan() {
if (local_device.is_valid()) {
finalize();
context->local_device_free(local_device);
}
}