godot/drivers/gles2/shaders/tonemap.glsl
lawnjelly ed3d029f5d GLES2 / GLES3 - Use gl_FragColor temporary
On some hardware, modifying gl_FragColor multiple times can cause large performance drops. This PR writes to a standard temporary variable instead, and copies across to gl_FragColor once only at the end of the fragment shader.

This could potentially lead to large gains in performance on affected hardware.
2023-11-07 11:53:25 +00:00

385 lines
11 KiB
GLSL

/* clang-format off */
[vertex]
#ifdef USE_GLES_OVER_GL
#define lowp
#define mediump
#define highp
#else
precision highp float;
precision highp int;
#endif
attribute vec2 vertex_attrib; // attrib:0
/* clang-format on */
attribute vec2 uv_in; // attrib:4
varying vec2 uv_interp;
void main() {
gl_Position = vec4(vertex_attrib, 0.0, 1.0);
uv_interp = uv_in;
}
/* clang-format off */
[fragment]
// texture2DLodEXT and textureCubeLodEXT are fragment shader specific.
// Do not copy these defines in the vertex section.
#ifndef USE_GLES_OVER_GL
#ifdef GL_EXT_shader_texture_lod
#extension GL_EXT_shader_texture_lod : enable
#define texture2DLod(img, coord, lod) texture2DLodEXT(img, coord, lod)
#define textureCubeLod(img, coord, lod) textureCubeLodEXT(img, coord, lod)
#endif
#endif // !USE_GLES_OVER_GL
#ifdef GL_ARB_shader_texture_lod
#extension GL_ARB_shader_texture_lod : enable
#endif
#if !defined(GL_EXT_shader_texture_lod) && !defined(GL_ARB_shader_texture_lod)
#define texture2DLod(img, coord, lod) texture2D(img, coord, lod)
#define textureCubeLod(img, coord, lod) textureCube(img, coord, lod)
#endif
// Allows the use of bitshift operators for bicubic upscale
#ifdef GL_EXT_gpu_shader4
#extension GL_EXT_gpu_shader4 : enable
#endif
#ifdef USE_GLES_OVER_GL
#define lowp
#define mediump
#define highp
#else
#if defined(USE_HIGHP_PRECISION)
precision highp float;
precision highp int;
#else
precision mediump float;
precision mediump int;
#endif
#endif
#include "stdlib.glsl"
varying vec2 uv_interp;
/* clang-format on */
uniform highp sampler2D source; //texunit:0
#if defined(USE_GLOW_LEVEL1) || defined(USE_GLOW_LEVEL2) || defined(USE_GLOW_LEVEL3) || defined(USE_GLOW_LEVEL4) || defined(USE_GLOW_LEVEL5) || defined(USE_GLOW_LEVEL6) || defined(USE_GLOW_LEVEL7)
#define USING_GLOW // only use glow when at least one glow level is selected
#ifdef USE_MULTI_TEXTURE_GLOW
uniform highp sampler2D source_glow1; //texunit:2
uniform highp sampler2D source_glow2; //texunit:3
uniform highp sampler2D source_glow3; //texunit:4
uniform highp sampler2D source_glow4; //texunit:5
uniform highp sampler2D source_glow5; //texunit:6
uniform highp sampler2D source_glow6; //texunit:7
#ifdef USE_GLOW_LEVEL7
uniform highp sampler2D source_glow7; //texunit:8
#endif
#else
uniform highp sampler2D source_glow; //texunit:2
#endif
uniform highp float glow_intensity;
#endif
#ifdef USE_BCS
uniform vec3 bcs;
#endif
#ifdef USE_FXAA
uniform vec2 pixel_size;
#endif
#ifdef USE_SHARPENING
uniform float sharpen_intensity;
#endif
#ifdef USE_COLOR_CORRECTION
uniform sampler2D color_correction; //texunit:1
#endif
#ifdef GL_EXT_gpu_shader4
#ifdef USE_GLOW_FILTER_BICUBIC
// w0, w1, w2, and w3 are the four cubic B-spline basis functions
float w0(float a) {
return (1.0 / 6.0) * (a * (a * (-a + 3.0) - 3.0) + 1.0);
}
float w1(float a) {
return (1.0 / 6.0) * (a * a * (3.0 * a - 6.0) + 4.0);
}
float w2(float a) {
return (1.0 / 6.0) * (a * (a * (-3.0 * a + 3.0) + 3.0) + 1.0);
}
float w3(float a) {
return (1.0 / 6.0) * (a * a * a);
}
// g0 and g1 are the two amplitude functions
float g0(float a) {
return w0(a) + w1(a);
}
float g1(float a) {
return w2(a) + w3(a);
}
// h0 and h1 are the two offset functions
float h0(float a) {
return -1.0 + w1(a) / (w0(a) + w1(a));
}
float h1(float a) {
return 1.0 + w3(a) / (w2(a) + w3(a));
}
uniform ivec2 glow_texture_size;
vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) {
float lod = float(p_lod);
vec2 tex_size = vec2(glow_texture_size >> p_lod);
vec2 texel_size = vec2(1.0) / tex_size;
uv = uv * tex_size + vec2(0.5);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5)) * texel_size;
vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5)) * texel_size;
vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5)) * texel_size;
return (g0(fuv.y) * (g0x * texture2DLod(tex, p0, lod) + g1x * texture2DLod(tex, p1, lod))) +
(g1(fuv.y) * (g0x * texture2DLod(tex, p2, lod) + g1x * texture2DLod(tex, p3, lod)));
}
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod)
#else //!USE_GLOW_FILTER_BICUBIC
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2DLod(m_tex, m_uv, float(m_lod))
#endif //USE_GLOW_FILTER_BICUBIC
#else //!GL_EXT_gpu_shader4
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2DLod(m_tex, m_uv, float(m_lod))
#endif //GL_EXT_gpu_shader4
vec4 apply_glow(vec4 color, vec3 glow) { // apply glow using the selected blending mode
#ifdef USE_GLOW_REPLACE
color.rgb = glow;
#endif
#ifdef USE_GLOW_SCREEN
color.rgb = max((color.rgb + glow) - (color.rgb * glow), vec3(0.0));
#endif
#ifdef USE_GLOW_SOFTLIGHT
glow = glow * vec3(0.5) + vec3(0.5);
color.r = (glow.r <= 0.5) ? (color.r - (1.0 - 2.0 * glow.r) * color.r * (1.0 - color.r)) : (((glow.r > 0.5) && (color.r <= 0.25)) ? (color.r + (2.0 * glow.r - 1.0) * (4.0 * color.r * (4.0 * color.r + 1.0) * (color.r - 1.0) + 7.0 * color.r)) : (color.r + (2.0 * glow.r - 1.0) * (sqrt(color.r) - color.r)));
color.g = (glow.g <= 0.5) ? (color.g - (1.0 - 2.0 * glow.g) * color.g * (1.0 - color.g)) : (((glow.g > 0.5) && (color.g <= 0.25)) ? (color.g + (2.0 * glow.g - 1.0) * (4.0 * color.g * (4.0 * color.g + 1.0) * (color.g - 1.0) + 7.0 * color.g)) : (color.g + (2.0 * glow.g - 1.0) * (sqrt(color.g) - color.g)));
color.b = (glow.b <= 0.5) ? (color.b - (1.0 - 2.0 * glow.b) * color.b * (1.0 - color.b)) : (((glow.b > 0.5) && (color.b <= 0.25)) ? (color.b + (2.0 * glow.b - 1.0) * (4.0 * color.b * (4.0 * color.b + 1.0) * (color.b - 1.0) + 7.0 * color.b)) : (color.b + (2.0 * glow.b - 1.0) * (sqrt(color.b) - color.b)));
#endif
#if !defined(USE_GLOW_SCREEN) && !defined(USE_GLOW_SOFTLIGHT) && !defined(USE_GLOW_REPLACE) // no other selected -> additive
color.rgb += glow;
#endif
#ifndef USE_GLOW_SOFTLIGHT // softlight has no effect on black color
// compute the alpha from glow
float a = max(max(glow.r, glow.g), glow.b);
color.a = a + color.a * (1.0 - a);
if (color.a == 0.0) {
color.rgb = vec3(0.0);
} else if (color.a < 1.0) {
color.rgb /= color.a;
}
#endif
return color;
}
vec3 apply_bcs(vec3 color, vec3 bcs) {
color = mix(vec3(0.0), color, bcs.x);
color = mix(vec3(0.5), color, bcs.y);
color = mix(vec3(dot(vec3(1.0), color) * 0.33333), color, bcs.z);
return color;
}
vec3 apply_color_correction(vec3 color, sampler2D correction_tex) {
color.r = texture2D(correction_tex, vec2(color.r, 0.0)).r;
color.g = texture2D(correction_tex, vec2(color.g, 0.0)).g;
color.b = texture2D(correction_tex, vec2(color.b, 0.0)).b;
return color;
}
vec4 apply_fxaa(vec4 color, vec2 uv_interp, vec2 pixel_size) {
const float FXAA_REDUCE_MIN = (1.0 / 128.0);
const float FXAA_REDUCE_MUL = (1.0 / 8.0);
const float FXAA_SPAN_MAX = 8.0;
const vec3 luma = vec3(0.299, 0.587, 0.114);
vec4 rgbNW = texture2DLod(source, uv_interp + vec2(-0.5, -0.5) * pixel_size, 0.0);
vec4 rgbNE = texture2DLod(source, uv_interp + vec2(0.5, -0.5) * pixel_size, 0.0);
vec4 rgbSW = texture2DLod(source, uv_interp + vec2(-0.5, 0.5) * pixel_size, 0.0);
vec4 rgbSE = texture2DLod(source, uv_interp + vec2(0.5, 0.5) * pixel_size, 0.0);
vec3 rgbM = color.rgb;
#ifdef DISABLE_ALPHA
float lumaNW = dot(rgbNW.rgb, luma);
float lumaNE = dot(rgbNE.rgb, luma);
float lumaSW = dot(rgbSW.rgb, luma);
float lumaSE = dot(rgbSE.rgb, luma);
float lumaM = dot(rgbM, luma);
#else
float lumaNW = dot(rgbNW.rgb, luma) - ((1.0 - rgbNW.a) / 8.0);
float lumaNE = dot(rgbNE.rgb, luma) - ((1.0 - rgbNE.a) / 8.0);
float lumaSW = dot(rgbSW.rgb, luma) - ((1.0 - rgbSW.a) / 8.0);
float lumaSE = dot(rgbSE.rgb, luma) - ((1.0 - rgbSE.a) / 8.0);
float lumaM = dot(rgbM, luma) - (color.a / 8.0);
#endif
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
vec2 dir;
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) *
(0.25 * FXAA_REDUCE_MUL),
FXAA_REDUCE_MIN);
float rcpDirMin = 1.0 / (min(abs(dir.x), abs(dir.y)) + dirReduce);
dir = min(vec2(FXAA_SPAN_MAX, FXAA_SPAN_MAX),
max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
dir * rcpDirMin)) *
pixel_size;
vec4 rgbA = 0.5 * (texture2DLod(source, uv_interp + dir * (1.0 / 3.0 - 0.5), 0.0) + texture2DLod(source, uv_interp + dir * (2.0 / 3.0 - 0.5), 0.0));
vec4 rgbB = rgbA * 0.5 + 0.25 * (texture2DLod(source, uv_interp + dir * -0.5, 0.0) + texture2DLod(source, uv_interp + dir * 0.5, 0.0));
#ifdef DISABLE_ALPHA
float lumaB = dot(rgbB.rgb, luma);
vec4 color_output = ((lumaB < lumaMin) || (lumaB > lumaMax)) ? rgbA : rgbB;
return vec4(color_output.rgb, 1.0);
#else
float lumaB = dot(rgbB.rgb, luma) - ((1.0 - rgbB.a) / 8.0);
vec4 color_output = ((lumaB < lumaMin) || (lumaB > lumaMax)) ? rgbA : rgbB;
if (color_output.a == 0.0) {
color_output.rgb = vec3(0.0);
} else if (color_output.a < 1.0) {
color_output.rgb /= color_output.a;
}
return color_output;
#endif
}
void main() {
vec4 color = texture2DLod(source, uv_interp, 0.0);
#ifdef DISABLE_ALPHA
color.a = 1.0;
#endif
#ifdef USE_FXAA
color = apply_fxaa(color, uv_interp, pixel_size);
#endif
// Glow
#ifdef USING_GLOW
vec3 glow = vec3(0.0);
#ifdef USE_MULTI_TEXTURE_GLOW
#ifdef USE_GLOW_LEVEL1
glow += GLOW_TEXTURE_SAMPLE(source_glow1, uv_interp, 0).rgb;
#ifdef USE_GLOW_LEVEL2
glow += GLOW_TEXTURE_SAMPLE(source_glow2, uv_interp, 0).rgb;
#ifdef USE_GLOW_LEVEL3
glow += GLOW_TEXTURE_SAMPLE(source_glow3, uv_interp, 0).rgb;
#ifdef USE_GLOW_LEVEL4
glow += GLOW_TEXTURE_SAMPLE(source_glow4, uv_interp, 0).rgb;
#ifdef USE_GLOW_LEVEL5
glow += GLOW_TEXTURE_SAMPLE(source_glow5, uv_interp, 0).rgb;
#ifdef USE_GLOW_LEVEL6
glow += GLOW_TEXTURE_SAMPLE(source_glow6, uv_interp, 0).rgb;
#ifdef USE_GLOW_LEVEL7
glow += GLOW_TEXTURE_SAMPLE(source_glow7, uv_interp, 0).rgb;
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#else
#ifdef USE_GLOW_LEVEL1
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 1).rgb;
#endif
#ifdef USE_GLOW_LEVEL2
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 2).rgb;
#endif
#ifdef USE_GLOW_LEVEL3
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 3).rgb;
#endif
#ifdef USE_GLOW_LEVEL4
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 4).rgb;
#endif
#ifdef USE_GLOW_LEVEL5
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 5).rgb;
#endif
#ifdef USE_GLOW_LEVEL6
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 6).rgb;
#endif
#ifdef USE_GLOW_LEVEL7
glow += GLOW_TEXTURE_SAMPLE(source_glow, uv_interp, 7).rgb;
#endif
#endif //USE_MULTI_TEXTURE_GLOW
glow *= glow_intensity;
color = apply_glow(color, glow);
#endif
// Additional effects
#ifdef USE_BCS
color.rgb = apply_bcs(color.rgb, bcs);
#endif
#ifdef USE_COLOR_CORRECTION
color.rgb = apply_color_correction(color.rgb, color_correction);
#endif
#ifdef DISABLE_ALPHA
color.a = 1.0;
#endif
gl_FragColor = color;
}