Optimize and fix backbuffer gaussian blur
This commit is contained in:
parent
c24fc415dc
commit
60d8df3fee
|
@ -482,12 +482,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i
|
|||
RD::get_singleton()->compute_list_end();
|
||||
}
|
||||
|
||||
void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst) {
|
||||
void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, const Rect2i &p_region, bool p_8bit_dst) {
|
||||
ERR_FAIL_COND_MSG(prefer_raster_effects, "Can't use the compute version of the gaussian blur with the mobile renderer.");
|
||||
|
||||
memset(©.push_constant, 0, sizeof(CopyPushConstant));
|
||||
|
||||
uint32_t base_flags = 0;
|
||||
copy.push_constant.section[0] = p_region.position.x;
|
||||
copy.push_constant.section[1] = p_region.position.y;
|
||||
copy.push_constant.section[2] = p_region.size.width;
|
||||
|
@ -497,23 +496,12 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back
|
|||
RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin();
|
||||
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]);
|
||||
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0);
|
||||
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3);
|
||||
|
||||
copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL;
|
||||
RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant));
|
||||
|
||||
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
|
||||
|
||||
RD::get_singleton()->compute_list_add_barrier(compute_list);
|
||||
|
||||
//VERTICAL
|
||||
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_back_texture), 0);
|
||||
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3);
|
||||
|
||||
copy.push_constant.flags = base_flags;
|
||||
RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant));
|
||||
|
||||
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
|
||||
|
||||
RD::get_singleton()->compute_list_end();
|
||||
}
|
||||
|
||||
|
@ -2344,8 +2332,8 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) {
|
|||
Vector<String> copy_modes;
|
||||
copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n");
|
||||
copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define DST_IMAGE_8BIT\n");
|
||||
copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n");
|
||||
copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n");
|
||||
copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define MODE_GLOW\n");
|
||||
copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define MODE_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n");
|
||||
copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n");
|
||||
copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n#define DST_IMAGE_8BIT\n");
|
||||
copy_modes.push_back("\n#define MODE_SIMPLE_COPY_DEPTH\n");
|
||||
|
|
|
@ -899,7 +899,7 @@ public:
|
|||
void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false);
|
||||
void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far);
|
||||
void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false);
|
||||
void gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst = false);
|
||||
void gaussian_blur(RID p_source_rd_texture, RID p_texture, const Rect2i &p_region, bool p_8bit_dst = false);
|
||||
void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false);
|
||||
void gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0);
|
||||
void gaussian_glow_raster(RID p_source_rd_texture, RID p_framebuffer_half, RID p_rd_texture_half, RID p_dest_framebuffer, const Vector2 &p_pixel_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0);
|
||||
|
|
|
@ -7516,10 +7516,6 @@ void RendererStorageRD::_clear_render_target(RenderTarget *rt) {
|
|||
if (rt->backbuffer.is_valid()) {
|
||||
RD::get_singleton()->free(rt->backbuffer);
|
||||
rt->backbuffer = RID();
|
||||
for (int i = 0; i < rt->backbuffer_mipmaps.size(); i++) {
|
||||
//just erase copies, since the rest are erased by dependency
|
||||
RD::get_singleton()->free(rt->backbuffer_mipmaps[i].mipmap_copy);
|
||||
}
|
||||
rt->backbuffer_mipmaps.clear();
|
||||
rt->backbuffer_uniform_set = RID(); //chain deleted
|
||||
}
|
||||
|
@ -7636,7 +7632,9 @@ void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) {
|
|||
tf.mipmaps = mipmaps_required;
|
||||
|
||||
rt->backbuffer = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(rt->backbuffer, "Render Target Back Buffer");
|
||||
rt->backbuffer_mipmap0 = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, 0);
|
||||
RD::get_singleton()->set_resource_name(rt->backbuffer_mipmap0, "Back Buffer slice mipmap 0");
|
||||
|
||||
{
|
||||
Vector<RID> fb_tex;
|
||||
|
@ -7651,23 +7649,10 @@ void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) {
|
|||
}
|
||||
//create mipmaps
|
||||
for (uint32_t i = 1; i < mipmaps_required; i++) {
|
||||
RenderTarget::BackbufferMipmap mm;
|
||||
{
|
||||
mm.mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i);
|
||||
}
|
||||
RID mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i);
|
||||
RD::get_singleton()->set_resource_name(mipmap, "Back Buffer slice mip: " + itos(i));
|
||||
|
||||
{
|
||||
Size2 mm_size = Image::get_image_mipmap_size(tf.width, tf.height, Image::FORMAT_RGBA8, i);
|
||||
|
||||
RD::TextureFormat mmtf = tf;
|
||||
mmtf.width = mm_size.width;
|
||||
mmtf.height = mm_size.height;
|
||||
mmtf.mipmaps = 1;
|
||||
|
||||
mm.mipmap_copy = RD::get_singleton()->texture_create(mmtf, RD::TextureView());
|
||||
}
|
||||
|
||||
rt->backbuffer_mipmaps.push_back(mm);
|
||||
rt->backbuffer_mipmaps.push_back(mipmap);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8111,7 +8096,7 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c
|
|||
if (!p_gen_mipmaps) {
|
||||
return;
|
||||
}
|
||||
|
||||
RD::get_singleton()->draw_command_begin_label("Gaussian Blur Mipmaps");
|
||||
//then mipmap blur
|
||||
RID prev_texture = rt->color; //use color, not backbuffer, as bb has mipmaps.
|
||||
|
||||
|
@ -8121,10 +8106,11 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c
|
|||
region.size.x = MAX(1, region.size.x >> 1);
|
||||
region.size.y = MAX(1, region.size.y >> 1);
|
||||
|
||||
const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i];
|
||||
effects->gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true);
|
||||
prev_texture = mm.mipmap;
|
||||
RID mipmap = rt->backbuffer_mipmaps[i];
|
||||
effects->gaussian_blur(prev_texture, mipmap, region, true);
|
||||
prev_texture = mipmap;
|
||||
}
|
||||
RD::get_singleton()->draw_command_end_label();
|
||||
}
|
||||
|
||||
void RendererStorageRD::render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color) {
|
||||
|
@ -8164,7 +8150,7 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe
|
|||
return; //nothing to do
|
||||
}
|
||||
}
|
||||
|
||||
RD::get_singleton()->draw_command_begin_label("Gaussian Blur Mipmaps2");
|
||||
//then mipmap blur
|
||||
RID prev_texture = rt->backbuffer_mipmap0;
|
||||
|
||||
|
@ -8174,10 +8160,11 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe
|
|||
region.size.x = MAX(1, region.size.x >> 1);
|
||||
region.size.y = MAX(1, region.size.y >> 1);
|
||||
|
||||
const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i];
|
||||
effects->gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true);
|
||||
prev_texture = mm.mipmap;
|
||||
RID mipmap = rt->backbuffer_mipmaps[i];
|
||||
effects->gaussian_blur(prev_texture, mipmap, region, true);
|
||||
prev_texture = mipmap;
|
||||
}
|
||||
RD::get_singleton()->draw_command_end_label();
|
||||
}
|
||||
|
||||
RID RendererStorageRD::render_target_get_framebuffer_uniform_set(RID p_render_target) {
|
||||
|
|
|
@ -1169,12 +1169,7 @@ private:
|
|||
RID backbuffer_fb;
|
||||
RID backbuffer_mipmap0;
|
||||
|
||||
struct BackbufferMipmap {
|
||||
RID mipmap;
|
||||
RID mipmap_copy;
|
||||
};
|
||||
|
||||
Vector<BackbufferMipmap> backbuffer_mipmaps;
|
||||
Vector<RID> backbuffer_mipmaps;
|
||||
|
||||
RID framebuffer_uniform_set;
|
||||
RID backbuffer_uniform_set;
|
||||
|
|
|
@ -61,7 +61,7 @@ layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buff
|
|||
layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer;
|
||||
#endif
|
||||
|
||||
#ifdef MODE_GAUSSIAN_GLOW
|
||||
#ifdef MODE_GAUSSIAN_BLUR
|
||||
shared vec4 local_cache[256];
|
||||
shared vec4 temp_cache[128];
|
||||
#endif
|
||||
|
@ -70,7 +70,7 @@ void main() {
|
|||
// Pixel being shaded
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
#ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads
|
||||
#ifndef MODE_GAUSSIAN_BLUR // Gaussian blur needs the extra threads
|
||||
if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing
|
||||
return;
|
||||
}
|
||||
|
@ -92,35 +92,11 @@ void main() {
|
|||
|
||||
#ifdef MODE_GAUSSIAN_BLUR
|
||||
|
||||
//Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect
|
||||
|
||||
if (bool(params.flags & FLAG_HORIZONTAL)) {
|
||||
ivec2 base_pos = (pos + params.section.xy) << 1;
|
||||
vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.214607;
|
||||
color += texelFetch(source_color, base_pos + ivec2(1, 0), 0) * 0.189879;
|
||||
color += texelFetch(source_color, base_pos + ivec2(2, 0), 0) * 0.131514;
|
||||
color += texelFetch(source_color, base_pos + ivec2(3, 0), 0) * 0.071303;
|
||||
color += texelFetch(source_color, base_pos + ivec2(-1, 0), 0) * 0.189879;
|
||||
color += texelFetch(source_color, base_pos + ivec2(-2, 0), 0) * 0.131514;
|
||||
color += texelFetch(source_color, base_pos + ivec2(-3, 0), 0) * 0.071303;
|
||||
imageStore(dest_buffer, pos + params.target, color);
|
||||
} else {
|
||||
ivec2 base_pos = (pos + params.section.xy);
|
||||
vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.38774;
|
||||
color += texelFetch(source_color, base_pos + ivec2(0, 1), 0) * 0.24477;
|
||||
color += texelFetch(source_color, base_pos + ivec2(0, 2), 0) * 0.06136;
|
||||
color += texelFetch(source_color, base_pos + ivec2(0, -1), 0) * 0.24477;
|
||||
color += texelFetch(source_color, base_pos + ivec2(0, -2), 0) * 0.06136;
|
||||
imageStore(dest_buffer, pos + params.target, color);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MODE_GAUSSIAN_GLOW
|
||||
|
||||
// First pass copy texture into 16x16 local memory for every 8x8 thread block
|
||||
vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
|
||||
uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16;
|
||||
|
||||
#ifdef MODE_GLOW
|
||||
if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) {
|
||||
vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
|
||||
|
||||
|
@ -128,12 +104,15 @@ void main() {
|
|||
local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5;
|
||||
local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5;
|
||||
local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5;
|
||||
} else {
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0);
|
||||
local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0);
|
||||
local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0);
|
||||
local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0);
|
||||
}
|
||||
#ifdef MODE_GLOW
|
||||
if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) {
|
||||
// Tonemap initial samples to reduce weight of fireflies: https://graphicrants.blogspot.com/2013/12/tone-mapping.html
|
||||
local_cache[dest_index] /= 1.0 + dot(local_cache[dest_index].rgb, vec3(0.299, 0.587, 0.114));
|
||||
|
@ -141,29 +120,33 @@ void main() {
|
|||
local_cache[dest_index + 16] /= 1.0 + dot(local_cache[dest_index + 16].rgb, vec3(0.299, 0.587, 0.114));
|
||||
local_cache[dest_index + 16 + 1] /= 1.0 + dot(local_cache[dest_index + 16 + 1].rgb, vec3(0.299, 0.587, 0.114));
|
||||
}
|
||||
|
||||
const float kernel[4] = { 0.174938, 0.165569, 0.140367, 0.106595 };
|
||||
#else
|
||||
// Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect.
|
||||
const float kernel[4] = { 0.214607, 0.189879, 0.131514, 0.071303 };
|
||||
#endif
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
// Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution
|
||||
uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4;
|
||||
vec4 color_top = vec4(0.0);
|
||||
color_top += local_cache[read_index] * 0.174938;
|
||||
color_top += local_cache[read_index + 1] * 0.165569;
|
||||
color_top += local_cache[read_index + 2] * 0.140367;
|
||||
color_top += local_cache[read_index + 3] * 0.106595;
|
||||
color_top += local_cache[read_index - 1] * 0.165569;
|
||||
color_top += local_cache[read_index - 2] * 0.140367;
|
||||
color_top += local_cache[read_index - 3] * 0.106595;
|
||||
color_top += local_cache[read_index] * kernel[0];
|
||||
color_top += local_cache[read_index + 1] * kernel[1];
|
||||
color_top += local_cache[read_index + 2] * kernel[2];
|
||||
color_top += local_cache[read_index + 3] * kernel[3];
|
||||
color_top += local_cache[read_index - 1] * kernel[1];
|
||||
color_top += local_cache[read_index - 2] * kernel[2];
|
||||
color_top += local_cache[read_index - 3] * kernel[3];
|
||||
|
||||
vec4 color_bottom = vec4(0.0);
|
||||
color_bottom += local_cache[read_index + 16] * 0.174938;
|
||||
color_bottom += local_cache[read_index + 1 + 16] * 0.165569;
|
||||
color_bottom += local_cache[read_index + 2 + 16] * 0.140367;
|
||||
color_bottom += local_cache[read_index + 3 + 16] * 0.106595;
|
||||
color_bottom += local_cache[read_index - 1 + 16] * 0.165569;
|
||||
color_bottom += local_cache[read_index - 2 + 16] * 0.140367;
|
||||
color_bottom += local_cache[read_index - 3 + 16] * 0.106595;
|
||||
color_bottom += local_cache[read_index + 16] * kernel[0];
|
||||
color_bottom += local_cache[read_index + 1 + 16] * kernel[1];
|
||||
color_bottom += local_cache[read_index + 2 + 16] * kernel[2];
|
||||
color_bottom += local_cache[read_index + 3 + 16] * kernel[3];
|
||||
color_bottom += local_cache[read_index - 1 + 16] * kernel[1];
|
||||
color_bottom += local_cache[read_index - 2 + 16] * kernel[2];
|
||||
color_bottom += local_cache[read_index - 3 + 16] * kernel[3];
|
||||
|
||||
// rotate samples to take advantage of cache coherency
|
||||
uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16;
|
||||
|
@ -174,18 +157,24 @@ void main() {
|
|||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
// If destination outside of texture, can stop doing work now
|
||||
if (any(greaterThanEqual(pos, params.section.zw))) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Vertical pass
|
||||
uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4;
|
||||
vec4 color = vec4(0.0);
|
||||
|
||||
color += temp_cache[index] * 0.174938;
|
||||
color += temp_cache[index + 1] * 0.165569;
|
||||
color += temp_cache[index + 2] * 0.140367;
|
||||
color += temp_cache[index + 3] * 0.106595;
|
||||
color += temp_cache[index - 1] * 0.165569;
|
||||
color += temp_cache[index - 2] * 0.140367;
|
||||
color += temp_cache[index - 3] * 0.106595;
|
||||
color += temp_cache[index] * kernel[0];
|
||||
color += temp_cache[index + 1] * kernel[1];
|
||||
color += temp_cache[index + 2] * kernel[2];
|
||||
color += temp_cache[index + 3] * kernel[3];
|
||||
color += temp_cache[index - 1] * kernel[1];
|
||||
color += temp_cache[index - 2] * kernel[2];
|
||||
color += temp_cache[index - 3] * kernel[3];
|
||||
|
||||
#ifdef MODE_GLOW
|
||||
if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) {
|
||||
// Undo tonemap to restore range: https://graphicrants.blogspot.com/2013/12/tone-mapping.html
|
||||
color /= 1.0 - dot(color.rgb, vec3(0.299, 0.587, 0.114));
|
||||
|
@ -205,7 +194,7 @@ void main() {
|
|||
|
||||
color = min(color * feedback, vec4(params.glow_luminance_cap));
|
||||
}
|
||||
|
||||
#endif
|
||||
imageStore(dest_buffer, pos + params.target, color);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue