Optimize and fix backbuffer gaussian blur

This commit is contained in:
clayjohn 2022-02-05 15:03:39 -08:00
parent c24fc415dc
commit 60d8df3fee
5 changed files with 61 additions and 102 deletions

View File

@ -482,12 +482,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst) { void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, const Rect2i &p_region, bool p_8bit_dst) {
ERR_FAIL_COND_MSG(prefer_raster_effects, "Can't use the compute version of the gaussian blur with the mobile renderer."); ERR_FAIL_COND_MSG(prefer_raster_effects, "Can't use the compute version of the gaussian blur with the mobile renderer.");
memset(&copy.push_constant, 0, sizeof(CopyPushConstant)); memset(&copy.push_constant, 0, sizeof(CopyPushConstant));
uint32_t base_flags = 0;
copy.push_constant.section[0] = p_region.position.x; copy.push_constant.section[0] = p_region.position.x;
copy.push_constant.section[1] = p_region.position.y; copy.push_constant.section[1] = p_region.position.y;
copy.push_constant.section[2] = p_region.size.width; copy.push_constant.section[2] = p_region.size.width;
@ -497,23 +496,12 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back
RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3);
copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list);
//VERTICAL
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_back_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3);
copy.push_constant.flags = base_flags;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -2344,8 +2332,8 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) {
Vector<String> copy_modes; Vector<String> copy_modes;
copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n"); copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n");
copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define DST_IMAGE_8BIT\n"); copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define DST_IMAGE_8BIT\n");
copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n"); copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define MODE_GLOW\n");
copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n"); copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define MODE_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n");
copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n"); copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n");
copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n#define DST_IMAGE_8BIT\n"); copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n#define DST_IMAGE_8BIT\n");
copy_modes.push_back("\n#define MODE_SIMPLE_COPY_DEPTH\n"); copy_modes.push_back("\n#define MODE_SIMPLE_COPY_DEPTH\n");

View File

@ -899,7 +899,7 @@ public:
void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false); void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false);
void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far); void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far);
void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false); void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false);
void gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst = false); void gaussian_blur(RID p_source_rd_texture, RID p_texture, const Rect2i &p_region, bool p_8bit_dst = false);
void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false); void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false);
void gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); void gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0);
void gaussian_glow_raster(RID p_source_rd_texture, RID p_framebuffer_half, RID p_rd_texture_half, RID p_dest_framebuffer, const Vector2 &p_pixel_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); void gaussian_glow_raster(RID p_source_rd_texture, RID p_framebuffer_half, RID p_rd_texture_half, RID p_dest_framebuffer, const Vector2 &p_pixel_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0);

View File

@ -7516,10 +7516,6 @@ void RendererStorageRD::_clear_render_target(RenderTarget *rt) {
if (rt->backbuffer.is_valid()) { if (rt->backbuffer.is_valid()) {
RD::get_singleton()->free(rt->backbuffer); RD::get_singleton()->free(rt->backbuffer);
rt->backbuffer = RID(); rt->backbuffer = RID();
for (int i = 0; i < rt->backbuffer_mipmaps.size(); i++) {
//just erase copies, since the rest are erased by dependency
RD::get_singleton()->free(rt->backbuffer_mipmaps[i].mipmap_copy);
}
rt->backbuffer_mipmaps.clear(); rt->backbuffer_mipmaps.clear();
rt->backbuffer_uniform_set = RID(); //chain deleted rt->backbuffer_uniform_set = RID(); //chain deleted
} }
@ -7636,7 +7632,9 @@ void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) {
tf.mipmaps = mipmaps_required; tf.mipmaps = mipmaps_required;
rt->backbuffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); rt->backbuffer = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(rt->backbuffer, "Render Target Back Buffer");
rt->backbuffer_mipmap0 = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, 0); rt->backbuffer_mipmap0 = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, 0);
RD::get_singleton()->set_resource_name(rt->backbuffer_mipmap0, "Back Buffer slice mipmap 0");
{ {
Vector<RID> fb_tex; Vector<RID> fb_tex;
@ -7651,23 +7649,10 @@ void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) {
} }
//create mipmaps //create mipmaps
for (uint32_t i = 1; i < mipmaps_required; i++) { for (uint32_t i = 1; i < mipmaps_required; i++) {
RenderTarget::BackbufferMipmap mm; RID mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i);
{ RD::get_singleton()->set_resource_name(mipmap, "Back Buffer slice mip: " + itos(i));
mm.mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i);
}
{ rt->backbuffer_mipmaps.push_back(mipmap);
Size2 mm_size = Image::get_image_mipmap_size(tf.width, tf.height, Image::FORMAT_RGBA8, i);
RD::TextureFormat mmtf = tf;
mmtf.width = mm_size.width;
mmtf.height = mm_size.height;
mmtf.mipmaps = 1;
mm.mipmap_copy = RD::get_singleton()->texture_create(mmtf, RD::TextureView());
}
rt->backbuffer_mipmaps.push_back(mm);
} }
} }
@ -8111,7 +8096,7 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c
if (!p_gen_mipmaps) { if (!p_gen_mipmaps) {
return; return;
} }
RD::get_singleton()->draw_command_begin_label("Gaussian Blur Mipmaps");
//then mipmap blur //then mipmap blur
RID prev_texture = rt->color; //use color, not backbuffer, as bb has mipmaps. RID prev_texture = rt->color; //use color, not backbuffer, as bb has mipmaps.
@ -8121,10 +8106,11 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c
region.size.x = MAX(1, region.size.x >> 1); region.size.x = MAX(1, region.size.x >> 1);
region.size.y = MAX(1, region.size.y >> 1); region.size.y = MAX(1, region.size.y >> 1);
const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i]; RID mipmap = rt->backbuffer_mipmaps[i];
effects->gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true); effects->gaussian_blur(prev_texture, mipmap, region, true);
prev_texture = mm.mipmap; prev_texture = mipmap;
} }
RD::get_singleton()->draw_command_end_label();
} }
void RendererStorageRD::render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color) { void RendererStorageRD::render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color) {
@ -8164,7 +8150,7 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe
return; //nothing to do return; //nothing to do
} }
} }
RD::get_singleton()->draw_command_begin_label("Gaussian Blur Mipmaps2");
//then mipmap blur //then mipmap blur
RID prev_texture = rt->backbuffer_mipmap0; RID prev_texture = rt->backbuffer_mipmap0;
@ -8174,10 +8160,11 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe
region.size.x = MAX(1, region.size.x >> 1); region.size.x = MAX(1, region.size.x >> 1);
region.size.y = MAX(1, region.size.y >> 1); region.size.y = MAX(1, region.size.y >> 1);
const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i]; RID mipmap = rt->backbuffer_mipmaps[i];
effects->gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true); effects->gaussian_blur(prev_texture, mipmap, region, true);
prev_texture = mm.mipmap; prev_texture = mipmap;
} }
RD::get_singleton()->draw_command_end_label();
} }
RID RendererStorageRD::render_target_get_framebuffer_uniform_set(RID p_render_target) { RID RendererStorageRD::render_target_get_framebuffer_uniform_set(RID p_render_target) {

View File

@ -1169,12 +1169,7 @@ private:
RID backbuffer_fb; RID backbuffer_fb;
RID backbuffer_mipmap0; RID backbuffer_mipmap0;
struct BackbufferMipmap { Vector<RID> backbuffer_mipmaps;
RID mipmap;
RID mipmap_copy;
};
Vector<BackbufferMipmap> backbuffer_mipmaps;
RID framebuffer_uniform_set; RID framebuffer_uniform_set;
RID backbuffer_uniform_set; RID backbuffer_uniform_set;

View File

@ -61,7 +61,7 @@ layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buff
layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer; layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer;
#endif #endif
#ifdef MODE_GAUSSIAN_GLOW #ifdef MODE_GAUSSIAN_BLUR
shared vec4 local_cache[256]; shared vec4 local_cache[256];
shared vec4 temp_cache[128]; shared vec4 temp_cache[128];
#endif #endif
@ -70,7 +70,7 @@ void main() {
// Pixel being shaded // Pixel being shaded
ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
#ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads #ifndef MODE_GAUSSIAN_BLUR // Gaussian blur needs the extra threads
if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing
return; return;
} }
@ -92,35 +92,11 @@ void main() {
#ifdef MODE_GAUSSIAN_BLUR #ifdef MODE_GAUSSIAN_BLUR
//Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect
if (bool(params.flags & FLAG_HORIZONTAL)) {
ivec2 base_pos = (pos + params.section.xy) << 1;
vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.214607;
color += texelFetch(source_color, base_pos + ivec2(1, 0), 0) * 0.189879;
color += texelFetch(source_color, base_pos + ivec2(2, 0), 0) * 0.131514;
color += texelFetch(source_color, base_pos + ivec2(3, 0), 0) * 0.071303;
color += texelFetch(source_color, base_pos + ivec2(-1, 0), 0) * 0.189879;
color += texelFetch(source_color, base_pos + ivec2(-2, 0), 0) * 0.131514;
color += texelFetch(source_color, base_pos + ivec2(-3, 0), 0) * 0.071303;
imageStore(dest_buffer, pos + params.target, color);
} else {
ivec2 base_pos = (pos + params.section.xy);
vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.38774;
color += texelFetch(source_color, base_pos + ivec2(0, 1), 0) * 0.24477;
color += texelFetch(source_color, base_pos + ivec2(0, 2), 0) * 0.06136;
color += texelFetch(source_color, base_pos + ivec2(0, -1), 0) * 0.24477;
color += texelFetch(source_color, base_pos + ivec2(0, -2), 0) * 0.06136;
imageStore(dest_buffer, pos + params.target, color);
}
#endif
#ifdef MODE_GAUSSIAN_GLOW
// First pass copy texture into 16x16 local memory for every 8x8 thread block // First pass copy texture into 16x16 local memory for every 8x8 thread block
vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16; uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16;
#ifdef MODE_GLOW
if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) { if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) {
vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
@ -128,12 +104,15 @@ void main() {
local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5; local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5;
local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5; local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5;
local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5; local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5;
} else { } else
#endif
{
local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0); local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0);
local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0); local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0);
local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0); local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0);
local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0); local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0);
} }
#ifdef MODE_GLOW
if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) {
// Tonemap initial samples to reduce weight of fireflies: https://graphicrants.blogspot.com/2013/12/tone-mapping.html // Tonemap initial samples to reduce weight of fireflies: https://graphicrants.blogspot.com/2013/12/tone-mapping.html
local_cache[dest_index] /= 1.0 + dot(local_cache[dest_index].rgb, vec3(0.299, 0.587, 0.114)); local_cache[dest_index] /= 1.0 + dot(local_cache[dest_index].rgb, vec3(0.299, 0.587, 0.114));
@ -141,29 +120,33 @@ void main() {
local_cache[dest_index + 16] /= 1.0 + dot(local_cache[dest_index + 16].rgb, vec3(0.299, 0.587, 0.114)); local_cache[dest_index + 16] /= 1.0 + dot(local_cache[dest_index + 16].rgb, vec3(0.299, 0.587, 0.114));
local_cache[dest_index + 16 + 1] /= 1.0 + dot(local_cache[dest_index + 16 + 1].rgb, vec3(0.299, 0.587, 0.114)); local_cache[dest_index + 16 + 1] /= 1.0 + dot(local_cache[dest_index + 16 + 1].rgb, vec3(0.299, 0.587, 0.114));
} }
const float kernel[4] = { 0.174938, 0.165569, 0.140367, 0.106595 };
#else
// Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect.
const float kernel[4] = { 0.214607, 0.189879, 0.131514, 0.071303 };
#endif
memoryBarrierShared(); memoryBarrierShared();
barrier(); barrier();
// Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution // Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution
uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4; uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4;
vec4 color_top = vec4(0.0); vec4 color_top = vec4(0.0);
color_top += local_cache[read_index] * 0.174938; color_top += local_cache[read_index] * kernel[0];
color_top += local_cache[read_index + 1] * 0.165569; color_top += local_cache[read_index + 1] * kernel[1];
color_top += local_cache[read_index + 2] * 0.140367; color_top += local_cache[read_index + 2] * kernel[2];
color_top += local_cache[read_index + 3] * 0.106595; color_top += local_cache[read_index + 3] * kernel[3];
color_top += local_cache[read_index - 1] * 0.165569; color_top += local_cache[read_index - 1] * kernel[1];
color_top += local_cache[read_index - 2] * 0.140367; color_top += local_cache[read_index - 2] * kernel[2];
color_top += local_cache[read_index - 3] * 0.106595; color_top += local_cache[read_index - 3] * kernel[3];
vec4 color_bottom = vec4(0.0); vec4 color_bottom = vec4(0.0);
color_bottom += local_cache[read_index + 16] * 0.174938; color_bottom += local_cache[read_index + 16] * kernel[0];
color_bottom += local_cache[read_index + 1 + 16] * 0.165569; color_bottom += local_cache[read_index + 1 + 16] * kernel[1];
color_bottom += local_cache[read_index + 2 + 16] * 0.140367; color_bottom += local_cache[read_index + 2 + 16] * kernel[2];
color_bottom += local_cache[read_index + 3 + 16] * 0.106595; color_bottom += local_cache[read_index + 3 + 16] * kernel[3];
color_bottom += local_cache[read_index - 1 + 16] * 0.165569; color_bottom += local_cache[read_index - 1 + 16] * kernel[1];
color_bottom += local_cache[read_index - 2 + 16] * 0.140367; color_bottom += local_cache[read_index - 2 + 16] * kernel[2];
color_bottom += local_cache[read_index - 3 + 16] * 0.106595; color_bottom += local_cache[read_index - 3 + 16] * kernel[3];
// rotate samples to take advantage of cache coherency // rotate samples to take advantage of cache coherency
uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16; uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16;
@ -174,18 +157,24 @@ void main() {
memoryBarrierShared(); memoryBarrierShared();
barrier(); barrier();
// If destination outside of texture, can stop doing work now
if (any(greaterThanEqual(pos, params.section.zw))) {
return;
}
// Vertical pass // Vertical pass
uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4; uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4;
vec4 color = vec4(0.0); vec4 color = vec4(0.0);
color += temp_cache[index] * 0.174938; color += temp_cache[index] * kernel[0];
color += temp_cache[index + 1] * 0.165569; color += temp_cache[index + 1] * kernel[1];
color += temp_cache[index + 2] * 0.140367; color += temp_cache[index + 2] * kernel[2];
color += temp_cache[index + 3] * 0.106595; color += temp_cache[index + 3] * kernel[3];
color += temp_cache[index - 1] * 0.165569; color += temp_cache[index - 1] * kernel[1];
color += temp_cache[index - 2] * 0.140367; color += temp_cache[index - 2] * kernel[2];
color += temp_cache[index - 3] * 0.106595; color += temp_cache[index - 3] * kernel[3];
#ifdef MODE_GLOW
if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) {
// Undo tonemap to restore range: https://graphicrants.blogspot.com/2013/12/tone-mapping.html // Undo tonemap to restore range: https://graphicrants.blogspot.com/2013/12/tone-mapping.html
color /= 1.0 - dot(color.rgb, vec3(0.299, 0.587, 0.114)); color /= 1.0 - dot(color.rgb, vec3(0.299, 0.587, 0.114));
@ -205,7 +194,7 @@ void main() {
color = min(color * feedback, vec4(params.glow_luminance_cap)); color = min(color * feedback, vec4(params.glow_luminance_cap));
} }
#endif
imageStore(dest_buffer, pos + params.target, color); imageStore(dest_buffer, pos + params.target, color);
#endif #endif