From 65a83785fd0eded2a8b4811f27b9c965f8af5294 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= <pedrojrulez@gmail.com>
Date: Sat, 2 Jul 2022 13:19:12 +0200
Subject: [PATCH] Avoid GL undefined behavior in ubershaders

---
 drivers/gles3/rasterizer_scene_gles3.cpp   |  2 +-
 drivers/gles3/rasterizer_storage_gles3.cpp | 11 ++++++++-
 drivers/gles3/rasterizer_storage_gles3.h   |  1 +
 drivers/gles3/shader_gles3.cpp             | 28 ++++++++++++++++++++--
 drivers/gles3/shader_gles3.h               |  7 ++++--
 gles_builders.py                           | 16 +++++++++++++
 6 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp
index 391bbf0d8e9..87a629a0144 100644
--- a/drivers/gles3/rasterizer_scene_gles3.cpp
+++ b/drivers/gles3/rasterizer_scene_gles3.cpp
@@ -5297,7 +5297,7 @@ void RasterizerSceneGLES3::initialize() {
 	glFrontFace(GL_CW);
 
 	if (storage->config.async_compilation_enabled) {
-		state.scene_shader.init_async_compilation();
+		state.scene_shader.init_async_compilation(storage->resources.depth_tex);
 	}
 }
 
diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp
index b7b5ae03ed9..ca05e0f83b6 100644
--- a/drivers/gles3/rasterizer_storage_gles3.cpp
+++ b/drivers/gles3/rasterizer_storage_gles3.cpp
@@ -8241,6 +8241,14 @@ void RasterizerStorageGLES3::initialize() {
 		glGenerateMipmap(GL_TEXTURE_2D);
 		glBindTexture(GL_TEXTURE_2D, 0);
 
+		glGenTextures(1, &resources.depth_tex);
+		unsigned char depthtexdata[8 * 8 * 2] = {};
+
+		glActiveTexture(GL_TEXTURE0);
+		glBindTexture(GL_TEXTURE_2D, resources.depth_tex);
+		glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, 8, 8, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthtexdata);
+		glBindTexture(GL_TEXTURE_2D, 0);
+
 		glGenTextures(1, &resources.white_tex_3d);
 
 		glActiveTexture(GL_TEXTURE0);
@@ -8331,7 +8339,7 @@ void RasterizerStorageGLES3::initialize() {
 	shaders.cubemap_filter.set_conditional(CubemapFilterShaderGLES3::LOW_QUALITY, !ggx_hq);
 	shaders.particles.init();
 	if (config.async_compilation_enabled) {
-		shaders.particles.init_async_compilation();
+		shaders.particles.init_async_compilation(resources.depth_tex);
 	}
 
 #ifdef GLES_OVER_GL
@@ -8389,6 +8397,7 @@ void RasterizerStorageGLES3::finalize() {
 	glDeleteTextures(1, &resources.white_tex);
 	glDeleteTextures(1, &resources.black_tex);
 	glDeleteTextures(1, &resources.normal_tex);
+	glDeleteTextures(1, &resources.depth_tex);
 }
 
 void RasterizerStorageGLES3::update_dirty_resources() {
diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h
index a1c5e4774ae..a40285b5586 100644
--- a/drivers/gles3/rasterizer_storage_gles3.h
+++ b/drivers/gles3/rasterizer_storage_gles3.h
@@ -142,6 +142,7 @@ public:
 		GLuint black_tex;
 		GLuint normal_tex;
 		GLuint aniso_tex;
+		GLuint depth_tex;
 
 		GLuint white_tex_3d;
 		GLuint white_tex_array;
diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp
index 0bf4585d8c2..388c3762ffc 100644
--- a/drivers/gles3/shader_gles3.cpp
+++ b/drivers/gles3/shader_gles3.cpp
@@ -195,6 +195,27 @@ bool ShaderGLES3::_bind_ubershader() {
 	CRASH_COND(new_conditional_version.version >= 0x80000000);
 #endif
 	glUniform1i(conditionals_uniform, new_conditional_version.version);
+
+	// This is done to avoid running into the GL UB message id 131222. Long explanation:
+	// If an ubershader has shadow samplers, they are generally not used if the current material has shadowing disabled,
+	// but that also implies the rasterizer won't do any preparation to the relevant shadow samplers (which won't really exist,
+	// so that's the correct way in a conditioned shader).
+	// However, in the case of the ubershader those shadow samplers are unconditionally declared, although potentially unused and
+	// thus "uninitialized". Sampling in that situation (compare disabled, no depth texture bound) is undefined behavior for GL.
+	// And that's a problem for us because, even if dynamic branching will serve to avoid using the unprepared sampler when shadowing
+	// is not enabled, the GPU may still run the other branch. And it's not just that the results from the sampling are undefined
+	// (that wouldn't be a problem and we could just ignore the warning); the problem is that sampling in that state is fully UB.
+	for (int i = 0; i < shadow_texunit_count; i++) {
+		int unit = shadow_texunits[i];
+		if (unit >= 0) {
+			glActiveTexture(GL_TEXTURE0 + unit);
+		} else {
+			glActiveTexture(GL_TEXTURE0 + max_image_units + unit);
+		}
+		glBindTexture(GL_TEXTURE_2D, depth_tex);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
+	}
+
 	return bound;
 }
 
@@ -1107,7 +1128,7 @@ GLint ShaderGLES3::get_uniform_location(const String &p_name) const {
 	return glGetUniformLocation(version->ids.main, p_name.ascii().get_data());
 }
 
-void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) {
+void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) {
 	ERR_FAIL_COND(version);
 	conditional_version.key = 0;
 	new_conditional_version.key = 0;
@@ -1119,6 +1140,8 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co
 	fragment_code = p_fragment_code;
 	texunit_pairs = p_texunit_pairs;
 	texunit_pair_count = p_texunit_pair_count;
+	shadow_texunits = p_shadow_texunits;
+	shadow_texunit_count = p_shadow_texunit_count;
 	vertex_code_start = p_vertex_code_start;
 	fragment_code_start = p_fragment_code_start;
 	attribute_pairs = p_attribute_pairs;
@@ -1209,7 +1232,8 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co
 	glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_image_units);
 }
 
-void ShaderGLES3::init_async_compilation() {
+void ShaderGLES3::init_async_compilation(GLuint p_depth_tex) {
+	depth_tex = p_depth_tex;
 	if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) {
 		// Warm up the ubershader for the case of no custom code
 		new_conditional_version.code_version = 0;
diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h
index 490a95c9621..1823ce06dde 100644
--- a/drivers/gles3/shader_gles3.h
+++ b/drivers/gles3/shader_gles3.h
@@ -96,6 +96,7 @@ private:
 	//@TODO Optimize to a fixed set of shader pools and use a LRU
 	int uniform_count;
 	int texunit_pair_count;
+	int shadow_texunit_count;
 	int conditional_count;
 	int ubo_count;
 	int feedback_count;
@@ -245,6 +246,7 @@ private:
 	const char **uniform_names;
 	const AttributePair *attribute_pairs;
 	const TexUnitPair *texunit_pairs;
+	const int *shadow_texunits;
 	const UBOPair *ubo_pairs;
 	const Feedback *feedbacks;
 	const char *vertex_code;
@@ -278,6 +280,7 @@ private:
 	static ShaderGLES3 *active;
 
 	int max_image_units;
+	GLuint depth_tex = 0;
 
 	_FORCE_INLINE_ void _set_uniform_variant(GLint p_uniform, const Variant &p_value) {
 		if (p_uniform < 0) {
@@ -375,7 +378,7 @@ protected:
 	_FORCE_INLINE_ int _get_uniform(int p_which) const;
 	_FORCE_INLINE_ void _set_conditional(int p_which, bool p_value);
 
-	void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start);
+	void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start);
 
 	ShaderGLES3();
 
@@ -404,7 +407,7 @@ public:
 	_FORCE_INLINE_ bool is_version_valid() const { return version && version->compile_status == Version::COMPILE_STATUS_OK; }
 
 	virtual void init() = 0;
-	void init_async_compilation();
+	void init_async_compilation(GLuint p_depth_tex);
 	bool is_async_compilation_supported();
 	void finish();
 
diff --git a/gles_builders.py b/gles_builders.py
index ef0cb811f57..8afc9a49929 100644
--- a/gles_builders.py
+++ b/gles_builders.py
@@ -19,6 +19,7 @@ class LegacyGLHeaderStruct:
         self.enums = {}
         self.texunits = []
         self.texunit_names = []
+        self.shadow_texunits = []
         self.ubos = []
         self.ubo_names = []
 
@@ -106,6 +107,8 @@ def include_file_in_legacygl_header(filename, header_data, depth):
 
                 if not x in header_data.texunit_names:
                     header_data.texunits += [(x, texunit)]
+                    if line.find("sampler2DShadow") != -1:
+                        header_data.shadow_texunits += [texunit]
                     header_data.texunit_names += [x]
 
         elif line.find("uniform") != -1 and line.lower().find("ubo:") != -1:
@@ -483,6 +486,15 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2
     else:
         fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n")
 
+    if not gles2:
+        if header_data.shadow_texunits:
+            fd.write("\t\tstatic int _shadow_texunits[]={")
+            for x in header_data.shadow_texunits:
+                fd.write(str(x) + ',')
+            fd.write("};\n\n")
+        else:
+            fd.write("\t\tstatic int *_shadow_texunits=NULL;\n")
+
     if not gles2 and header_data.ubos:
         fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n")
         for x in header_data.ubos:
@@ -537,6 +549,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2
                 + str(len(header_data.attributes))
                 + ", _texunit_pairs,"
                 + str(len(header_data.texunits))
+                + ", _shadow_texunits,"
+                + str(len(header_data.shadow_texunits))
                 + ",_ubo_pairs,"
                 + str(len(header_data.ubos))
                 + ",_feedbacks,"
@@ -566,6 +580,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2
                 + str(len(header_data.uniforms))
                 + ",_texunit_pairs,"
                 + str(len(header_data.texunits))
+                + ",_shadow_texunits,"
+                + str(len(header_data.shadow_texunits))
                 + ",_enums,"
                 + str(len(header_data.enums))
                 + ",_enum_values,"