Merge pull request #69944 from timothyqiu/3.5-cherrypicks

Cherry-picks for the 3.5 branch (future 3.5.2) - 2nd batch
2022-12-12 08:59:33 +01:00 · 2022-12-12 08:59:33 +01:00 · a94af65daa
commit a94af65daa
parent c76479727e 87947110b8
78 changed files with 2824 additions and 1734 deletions
--- a/.github/workflows/linux_builds.yml
+++ b/.github/workflows/linux_builds.yml
@ -71,6 +71,9 @@ jobs:
      - name: Setup python and scons
        uses: ./.github/actions/godot-deps

+      - name: Setup GCC problem matcher
+        uses: ammaraskar/gcc-problem-matcher@master
+
      - name: Compilation
        uses: ./.github/actions/godot-build
        with:
--- a/.github/workflows/windows_builds.yml
+++ b/.github/workflows/windows_builds.yml
@ -45,6 +45,9 @@ jobs:
      - name: Setup python and scons
        uses: ./.github/actions/godot-deps

+      - name: Setup MSVC problem matcher
+        uses: ammaraskar/msvc-problem-matcher@master
+
      - name: Compilation
        uses: ./.github/actions/godot-build
        with:
--- a/.gitignore
+++ b/.gitignore
@ -154,6 +154,7 @@ gmon.out

 # Jetbrains IDEs
 .idea/
+.fleet/

 # Kate
 *.kate-swp
@ -240,6 +241,8 @@ xcuserdata/
 [Rr]eleases/
 x64/
 x86/
+# Not build results, this is Theora source code.
+!thirdparty/libtheora/x86/
 [Ww][Ii][Nn]32/
 [Aa][Rr][Mm]/
 [Aa][Rr][Mm]64/
@ -316,6 +319,7 @@ _ReSharper*/

 # Others
 ClientBin/
+enc_temp_folder/
 ~$*
 *.dbmdl
 *.dbproj.schemaview
--- a/doc/classes/Array.xml
+++ b/doc/classes/Array.xml
@ -343,6 +343,7 @@
 		<method name="sort">
 			<description>
 				Sorts the array.
+				[b]Note:[/b] The sorting algorithm used is not [url=https://en.wikipedia.org/wiki/Sorting_algorithm#Stability]stable[/url]. This means that values considered equal may have their order changed when using [method sort].
 				[b]Note:[/b] Strings are sorted in alphabetical order (as opposed to natural order). This may lead to unexpected behavior when sorting an array of strings ending with a sequence of numbers. Consider the following example:
 				[codeblock]
 				var strings = ["string1", "string2", "string10", "string11"]
@ -357,7 +358,8 @@
 			<description>
 				Sorts the array using a custom method. The arguments are an object that holds the method and the name of such method. The custom method receives two arguments (a pair of elements from the array) and must return either [code]true[/code] or [code]false[/code].
 				For two elements [code]a[/code] and [code]b[/code], if the given method returns [code]true[/code], element [code]b[/code] will be after element [code]a[/code] in the array.
-				[b]Note:[/b] You cannot randomize the return value as the heapsort algorithm expects a deterministic result. Doing so will result in unexpected behavior.
+				[b]Note:[/b] The sorting algorithm used is not [url=https://en.wikipedia.org/wiki/Sorting_algorithm#Stability]stable[/url]. This means that values considered equal may have their order changed when using [method sort_custom].
+				[b]Note:[/b] You cannot randomize the return value as the heapsort algorithm expects a deterministic result. Randomizing the return value will result in unexpected behavior.
 				[codeblock]
 				class MyCustomSorter:
 				    static func sort_ascending(a, b):
--- a/doc/classes/BackBufferCopy.xml
+++ b/doc/classes/BackBufferCopy.xml
@ -4,8 +4,8 @@
 		Copies a region of the screen (or the whole screen) to a buffer so it can be accessed in your shader scripts through the [code]texture(SCREEN_TEXTURE, ...)[/code] function.
 	</brief_description>
 	<description>
-		Node for back-buffering the currently-displayed screen. The region defined in the BackBufferCopy node is buffered with the content of the screen it covers, or the entire screen according to the copy mode set. Use the [code]texture(SCREEN_TEXTURE, ...)[/code] function in your shader scripts to access the buffer.
-		[b]Note:[/b] Since this node inherits from [Node2D] (and not [Control]), anchors and margins won't apply to child [Control]-derived nodes. This can be problematic when resizing the window. To avoid this, add [Control]-derived nodes as [i]siblings[/i] to the BackBufferCopy node instead of adding them as children.
+		Node for back-buffering the currently-displayed screen. The region defined in the [BackBufferCopy] node is buffered with the content of the screen it covers, or the entire screen according to the copy mode set. Use the [code]texture(SCREEN_TEXTURE, ...)[/code] function in your shader scripts to access the buffer.
+		[b]Note:[/b] Since this node inherits from [Node2D] (and not [Control]), anchors and margins won't apply to child [Control]-derived nodes. This can be problematic when resizing the window. To avoid this, add [Control]-derived nodes as [i]siblings[/i] to the [BackBufferCopy] node instead of adding them as children.
 	</description>
 	<tutorials>
 	</tutorials>
@ -16,18 +16,18 @@
 			Buffer mode. See [enum CopyMode] constants.
 		</member>
 		<member name="rect" type="Rect2" setter="set_rect" getter="get_rect" default="Rect2( -100, -100, 200, 200 )">
-			The area covered by the BackBufferCopy. Only used if [member copy_mode] is [constant COPY_MODE_RECT].
+			The area covered by the [BackBufferCopy]. Only used if [member copy_mode] is [constant COPY_MODE_RECT].
 		</member>
 	</members>
 	<constants>
 		<constant name="COPY_MODE_DISABLED" value="0" enum="CopyMode">
-			Disables the buffering mode. This means the BackBufferCopy node will directly use the portion of screen it covers.
+			Disables the buffering mode. This means the [BackBufferCopy] node will directly use the portion of screen it covers.
 		</constant>
 		<constant name="COPY_MODE_RECT" value="1" enum="CopyMode">
-			BackBufferCopy buffers a rectangular region.
+			[BackBufferCopy] buffers a rectangular region.
 		</constant>
 		<constant name="COPY_MODE_VIEWPORT" value="2" enum="CopyMode">
-			BackBufferCopy buffers the entire screen.
+			[BackBufferCopy] buffers the entire screen.
 		</constant>
 	</constants>
 </class>
--- a/doc/classes/EditorScript.xml
+++ b/doc/classes/EditorScript.xml
@ -15,6 +15,7 @@
 		    print("Hello from the Godot Editor!")
 		[/codeblock]
 		[b]Note:[/b] The script is run in the Editor context, which means the output is visible in the console window started with the Editor (stdout) instead of the usual Godot [b]Output[/b] dock.
+		[b]Note:[/b] EditorScript is reference counted, meaning it is destroyed when nothing references it. This can cause errors during asynchronous operations if there are no references to the script.
 	</description>
 	<tutorials>
 	</tutorials>
--- a/doc/classes/Particles2D.xml
+++ b/doc/classes/Particles2D.xml
@ -13,7 +13,8 @@
 	</description>
 	<tutorials>
 		<link title="Particle systems (2D)">$DOCS_URL/tutorials/2d/particle_systems_2d.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Particles Demo">https://godotengine.org/asset-library/asset/118</link>
+		<link title="2D Dodge The Creeps Demo (uses GPUParticles2D for the trail behind the player)">https://godotengine.org/asset-library/asset/515</link>
 	</tutorials>
 	<methods>
 		<method name="capture_rect" qualifiers="const">
--- a/doc/classes/SceneTree.xml
+++ b/doc/classes/SceneTree.xml
@ -375,6 +375,7 @@
 		</constant>
 		<constant name="GROUP_CALL_UNIQUE" value="4" enum="GroupCallFlags">
 			Call a group only once even if the call is executed many times.
+			[b]Note:[/b] Arguments are not taken into account when deciding whether the call is unique or not. Therefore when the same method is called with different arguments, only the first call will be performed.
 		</constant>
 		<constant name="STRETCH_MODE_DISABLED" value="0" enum="StretchMode">
 			No stretching.
--- a/doc/classes/VisualInstance.xml
+++ b/doc/classes/VisualInstance.xml
@ -62,6 +62,13 @@
 			The render layer(s) this [VisualInstance] is drawn on.
 			This object will only be visible for [Camera]s whose cull mask includes the render object this [VisualInstance] is set to.
 		</member>
+		<member name="sorting_offset" type="float" setter="set_sorting_offset" getter="get_sorting_offset" default="0.0">
+			The sorting offset used by this [VisualInstance]. Adjusting it to a higher value will make the [VisualInstance] reliably draw on top of other [VisualInstance]s that are otherwise positioned at the same spot.
+		</member>
+		<member name="sorting_use_aabb_center" type="bool" setter="set_sorting_use_aabb_center" getter="is_sorting_use_aabb_center" default="true">
+			If [code]true[/code], the object is sorted based on the [AABB] center. Sorted based on the global position otherwise.
+			The [AABB] center based sorting is generally more accurate for 3D models. The position based sorting instead allows to better control the drawing order when working with [Particles] and [CPUParticles].
+		</member>
 	</members>
 	<constants>
 	</constants>
--- a/editor/import/resource_importer_wav.cpp
+++ b/editor/import/resource_importer_wav.cpp
@ -104,9 +104,10 @@ Error ResourceImporterWAV::import(const String &p_source_file, const String &p_s
 	file->get_buffer((uint8_t *)&riff, 4); //RIFF

 	if (riff[0] != 'R' || riff[1] != 'I' || riff[2] != 'F' || riff[3] != 'F') {
+		uint64_t length = file->get_len();
 		file->close();
 		memdelete(file);
-		ERR_FAIL_V(ERR_FILE_UNRECOGNIZED);
+		ERR_FAIL_V_MSG(ERR_FILE_UNRECOGNIZED, vformat("Not a WAV file. File should start with 'RIFF', but found '%s', in file of size %d bytes", riff, length));
 	}

 	/* GET FILESIZE */
@ -114,14 +115,15 @@ Error ResourceImporterWAV::import(const String &p_source_file, const String &p_s

 	/* CHECK WAVE */

-	char wave[4];
-
-	file->get_buffer((uint8_t *)&wave, 4); //RIFF
+	char wave[5];
+	wave[4] = 0;
+	file->get_buffer((uint8_t *)&wave, 4); //WAVE

 	if (wave[0] != 'W' || wave[1] != 'A' || wave[2] != 'V' || wave[3] != 'E') {
+		uint64_t length = file->get_len();
 		file->close();
 		memdelete(file);
-		ERR_FAIL_V_MSG(ERR_FILE_UNRECOGNIZED, "Not a WAV file (no WAVE RIFF header).");
+		ERR_FAIL_V_MSG(ERR_FILE_UNRECOGNIZED, vformat("Not a WAV file. Header should contain 'WAVE', but found '%s', in file of size %d bytes", wave, length));
 	}

 	// Let users override potential loop points from the WAV.
--- a/modules/bullet/rigid_body_bullet.cpp
+++ b/modules/bullet/rigid_body_bullet.cpp
@ -49,17 +49,15 @@
 */

 Vector3 BulletPhysicsDirectBodyState::get_total_gravity() const {
-	Vector3 gVec;
-	B_TO_G(body->btBody->getGravity(), gVec);
-	return gVec;
+	return body->total_gravity;
 }

 float BulletPhysicsDirectBodyState::get_total_angular_damp() const {
-	return body->btBody->getAngularDamping();
+	return body->total_angular_damp;
 }

 float BulletPhysicsDirectBodyState::get_total_linear_damp() const {
-	return body->btBody->getLinearDamping();
+	return body->total_linear_damp;
 }

 Vector3 BulletPhysicsDirectBodyState::get_center_of_mass() const {
@ -917,16 +915,9 @@ void RigidBodyBullet::reload_space_override_modificator() {
 		return;
 	}

-	if (omit_forces_integration) {
-		// Custom behaviour.
-		btBody->setGravity(btVector3(0, 0, 0));
-		btBody->setDamping(0, 0);
-		return;
-	}
-
-	Vector3 newGravity(0.0, 0.0, 0.0);
-	real_t newLinearDamp = MAX(0.0, linearDamp);
-	real_t newAngularDamp = MAX(0.0, angularDamp);
+	total_gravity = Vector3(0.0, 0.0, 0.0);
+	total_linear_damp = MAX(0.0, linearDamp);
+	total_angular_damp = MAX(0.0, angularDamp);

 	AreaBullet *currentArea;
 	// Variable used to calculate new gravity for gravity point areas, it is pointed by currentGravity pointer
@ -977,49 +968,54 @@ void RigidBodyBullet::reload_space_override_modificator() {
 				/// This area adds its gravity/damp values to whatever has been
 				/// calculated so far. This way, many overlapping areas can combine
 				/// their physics to make interesting
-				newGravity += support_gravity;
-				newLinearDamp += currentArea->get_spOv_linearDamp();
-				newAngularDamp += currentArea->get_spOv_angularDamp();
+				total_gravity += support_gravity;
+				total_linear_damp += currentArea->get_spOv_linearDamp();
+				total_angular_damp += currentArea->get_spOv_angularDamp();
 				break;
 			case PhysicsServer::AREA_SPACE_OVERRIDE_COMBINE_REPLACE:
 				/// This area adds its gravity/damp values to whatever has been calculated
 				/// so far. Then stops taking into account the rest of the areas, even the
 				/// default one.
-				newGravity += support_gravity;
-				newLinearDamp += currentArea->get_spOv_linearDamp();
-				newAngularDamp += currentArea->get_spOv_angularDamp();
+				total_gravity += support_gravity;
+				total_linear_damp += currentArea->get_spOv_linearDamp();
+				total_angular_damp += currentArea->get_spOv_angularDamp();
 				stopped = true;
 				break;
 			case PhysicsServer::AREA_SPACE_OVERRIDE_REPLACE:
 				/// This area replaces any gravity/damp, even the default one, and
 				/// stops taking into account the rest of the areas.
-				newGravity = support_gravity;
-				newLinearDamp = currentArea->get_spOv_linearDamp();
-				newAngularDamp = currentArea->get_spOv_angularDamp();
+				total_gravity = support_gravity;
+				total_linear_damp = currentArea->get_spOv_linearDamp();
+				total_angular_damp = currentArea->get_spOv_angularDamp();
 				stopped = true;
 				break;
 			case PhysicsServer::AREA_SPACE_OVERRIDE_REPLACE_COMBINE:
 				/// This area replaces any gravity/damp calculated so far, but keeps
 				/// calculating the rest of the areas, down to the default one.
-				newGravity = support_gravity;
-				newLinearDamp = currentArea->get_spOv_linearDamp();
-				newAngularDamp = currentArea->get_spOv_angularDamp();
+				total_gravity = support_gravity;
+				total_linear_damp = currentArea->get_spOv_linearDamp();
+				total_angular_damp = currentArea->get_spOv_angularDamp();
 				break;
 		}
 	}

 	// Add default gravity and damping from space.
 	if (!stopped) {
-		newGravity += space->get_gravity_direction() * space->get_gravity_magnitude();
-		newLinearDamp += space->get_linear_damp();
-		newAngularDamp += space->get_angular_damp();
+		total_gravity += space->get_gravity_direction() * space->get_gravity_magnitude();
+		total_linear_damp += space->get_linear_damp();
+		total_angular_damp += space->get_angular_damp();
 	}

-	btVector3 newBtGravity;
-	G_TO_B(newGravity * gravity_scale, newBtGravity);
-
-	btBody->setGravity(newBtGravity);
-	btBody->setDamping(newLinearDamp, newAngularDamp);
+	if (omit_forces_integration) {
+		// Don't apply gravity or damping.
+		btBody->setGravity(btVector3(0, 0, 0));
+		btBody->setDamping(0, 0);
+	} else {
+		btVector3 newBtGravity;
+		G_TO_B(total_gravity * gravity_scale, newBtGravity);
+		btBody->setGravity(newBtGravity);
+		btBody->setDamping(total_linear_damp, total_angular_damp);
+	}
 }

 void RigidBodyBullet::reload_kinematic_shapes() {
--- a/modules/bullet/rigid_body_bullet.h
+++ b/modules/bullet/rigid_body_bullet.h
@ -168,6 +168,9 @@ private:
 	real_t gravity_scale;
 	real_t linearDamp;
 	real_t angularDamp;
+	Vector3 total_gravity;
+	real_t total_linear_damp;
+	real_t total_angular_damp;
 	bool can_sleep;
 	bool omit_forces_integration;
 	bool can_integrate_forces;
--- a/modules/gdscript/doc_classes/@GDScript.xml
+++ b/modules/gdscript/doc_classes/@GDScript.xml
@ -359,7 +359,7 @@
 		<method name="get_stack">
 			<return type="Array" />
 			<description>
-				Returns an array of dictionaries representing the current call stack.
+				Returns an array of dictionaries representing the current call stack. See also [method print_stack].
 				[codeblock]
 				func _ready():
 				    foo()
@ -374,6 +374,7 @@
 				[codeblock]
 				[{function:bar, line:12, source:res://script.gd}, {function:foo, line:9, source:res://script.gd}, {function:_ready, line:6, source:res://script.gd}]
 				[/codeblock]
+				[b]Note:[/b] [method get_stack] only works if the running instance is connected to a debugging server (i.e. an editor instance). [method get_stack] will not work in projects exported in release mode, or in projects exported in debug mode if not connected to a debugging server.
 			</description>
 		</method>
 		<method name="hash">
@ -727,11 +728,12 @@
 		<method name="print_stack">
 			<return type="void" />
 			<description>
-				Prints a stack track at code location, only works when running with debugger turned on.
+				Prints a stack trace at the current code location. See also [method get_stack].
 				Output in the console would look something like this:
 				[codeblock]
 				Frame 0 - res://test.gd:16 in function '_process'
 				[/codeblock]
+				[b]Note:[/b] [method print_stack] only works if the running instance is connected to a debugging server (i.e. an editor instance). [method print_stack] will not work in projects exported in release mode, or in projects exported in debug mode if not connected to a debugging server.
 			</description>
 		</method>
 		<method name="printerr" qualifiers="vararg">
--- a/modules/regex/SCsub
+++ b/modules/regex/SCsub
@ -58,10 +58,10 @@ if env["builtin_pcre2"]:
        env_pcre2["OBJSUFFIX"] = "_" + width + env_pcre2["OBJSUFFIX"]
        env_pcre2.Append(CPPDEFINES=[("PCRE2_CODE_UNIT_WIDTH", width)])
        env_pcre2.add_source_files(thirdparty_obj, thirdparty_sources)
-        env.modules_sources += thirdparty_obj

    pcre2_builtin("16")
    pcre2_builtin("32")
+    env.modules_sources += thirdparty_obj


 # Godot source files
--- a/modules/webp/SCsub
+++ b/modules/webp/SCsub
@ -12,123 +12,129 @@ thirdparty_obj = []
 if env["builtin_libwebp"]:
    thirdparty_dir = "#thirdparty/libwebp/"
    thirdparty_sources = [
-        "dec/alpha_dec.c",
-        "dec/buffer_dec.c",
-        "dec/frame_dec.c",
-        "dec/idec_dec.c",
-        "dec/io_dec.c",
-        "dec/quant_dec.c",
-        "dec/tree_dec.c",
-        "dec/vp8_dec.c",
-        "dec/vp8l_dec.c",
-        "dec/webp_dec.c",
-        "demux/anim_decode.c",
-        "demux/demux.c",
-        "dsp/alpha_processing.c",
-        "dsp/alpha_processing_mips_dsp_r2.c",
-        "dsp/alpha_processing_neon.c",
-        "dsp/alpha_processing_sse2.c",
-        "dsp/alpha_processing_sse41.c",
-        "dsp/cost.c",
-        "dsp/cost_mips32.c",
-        "dsp/cost_mips_dsp_r2.c",
-        "dsp/cost_neon.c",
-        "dsp/cost_sse2.c",
-        "dsp/cpu.c",
-        "dsp/dec.c",
-        "dsp/dec_clip_tables.c",
-        "dsp/dec_mips32.c",
-        "dsp/dec_mips_dsp_r2.c",
-        "dsp/dec_msa.c",
-        "dsp/dec_neon.c",
-        "dsp/dec_sse2.c",
-        "dsp/dec_sse41.c",
-        "dsp/enc.c",
-        "dsp/enc_mips32.c",
-        "dsp/enc_mips_dsp_r2.c",
-        "dsp/enc_msa.c",
-        "dsp/enc_neon.c",
-        "dsp/enc_sse2.c",
-        "dsp/enc_sse41.c",
-        "dsp/filters.c",
-        "dsp/filters_mips_dsp_r2.c",
-        "dsp/filters_msa.c",
-        "dsp/filters_neon.c",
-        "dsp/filters_sse2.c",
-        "dsp/lossless.c",
-        "dsp/lossless_enc.c",
-        "dsp/lossless_enc_mips32.c",
-        "dsp/lossless_enc_mips_dsp_r2.c",
-        "dsp/lossless_enc_msa.c",
-        "dsp/lossless_enc_neon.c",
-        "dsp/lossless_enc_sse2.c",
-        "dsp/lossless_enc_sse41.c",
-        "dsp/lossless_mips_dsp_r2.c",
-        "dsp/lossless_msa.c",
-        "dsp/lossless_neon.c",
-        "dsp/lossless_sse2.c",
-        "dsp/lossless_sse41.c",
-        "dsp/rescaler.c",
-        "dsp/rescaler_mips32.c",
-        "dsp/rescaler_mips_dsp_r2.c",
-        "dsp/rescaler_msa.c",
-        "dsp/rescaler_neon.c",
-        "dsp/rescaler_sse2.c",
-        "dsp/ssim.c",
-        "dsp/ssim_sse2.c",
-        "dsp/upsampling.c",
-        "dsp/upsampling_mips_dsp_r2.c",
-        "dsp/upsampling_msa.c",
-        "dsp/upsampling_neon.c",
-        "dsp/upsampling_sse2.c",
-        "dsp/upsampling_sse41.c",
-        "dsp/yuv.c",
-        "dsp/yuv_mips32.c",
-        "dsp/yuv_mips_dsp_r2.c",
-        "dsp/yuv_neon.c",
-        "dsp/yuv_sse2.c",
-        "dsp/yuv_sse41.c",
-        "enc/alpha_enc.c",
-        "enc/analysis_enc.c",
-        "enc/backward_references_cost_enc.c",
-        "enc/backward_references_enc.c",
-        "enc/config_enc.c",
-        "enc/cost_enc.c",
-        "enc/filter_enc.c",
-        "enc/frame_enc.c",
-        "enc/histogram_enc.c",
-        "enc/iterator_enc.c",
-        "enc/near_lossless_enc.c",
-        "enc/picture_csp_enc.c",
-        "enc/picture_enc.c",
-        "enc/picture_psnr_enc.c",
-        "enc/picture_rescale_enc.c",
-        "enc/picture_tools_enc.c",
-        "enc/predictor_enc.c",
-        "enc/quant_enc.c",
-        "enc/syntax_enc.c",
-        "enc/token_enc.c",
-        "enc/tree_enc.c",
-        "enc/vp8l_enc.c",
-        "enc/webp_enc.c",
-        "mux/anim_encode.c",
-        "mux/muxedit.c",
-        "mux/muxinternal.c",
-        "mux/muxread.c",
-        "utils/bit_reader_utils.c",
-        "utils/bit_writer_utils.c",
-        "utils/color_cache_utils.c",
-        "utils/filters_utils.c",
-        "utils/huffman_encode_utils.c",
-        "utils/huffman_utils.c",
-        "utils/quant_levels_dec_utils.c",
-        "utils/quant_levels_utils.c",
-        "utils/random_utils.c",
-        "utils/rescaler_utils.c",
-        "utils/thread_utils.c",
-        "utils/utils.c",
+        "sharpyuv/sharpyuv.c",
+        "sharpyuv/sharpyuv_csp.c",
+        "sharpyuv/sharpyuv_dsp.c",
+        "sharpyuv/sharpyuv_gamma.c",
+        "sharpyuv/sharpyuv_neon.c",
+        "sharpyuv/sharpyuv_sse2.c",
+        "src/dec/alpha_dec.c",
+        "src/dec/buffer_dec.c",
+        "src/dec/frame_dec.c",
+        "src/dec/idec_dec.c",
+        "src/dec/io_dec.c",
+        "src/dec/quant_dec.c",
+        "src/dec/tree_dec.c",
+        "src/dec/vp8_dec.c",
+        "src/dec/vp8l_dec.c",
+        "src/dec/webp_dec.c",
+        "src/demux/anim_decode.c",
+        "src/demux/demux.c",
+        "src/dsp/alpha_processing.c",
+        "src/dsp/alpha_processing_mips_dsp_r2.c",
+        "src/dsp/alpha_processing_neon.c",
+        "src/dsp/alpha_processing_sse2.c",
+        "src/dsp/alpha_processing_sse41.c",
+        "src/dsp/cost.c",
+        "src/dsp/cost_mips32.c",
+        "src/dsp/cost_mips_dsp_r2.c",
+        "src/dsp/cost_neon.c",
+        "src/dsp/cost_sse2.c",
+        "src/dsp/cpu.c",
+        "src/dsp/dec.c",
+        "src/dsp/dec_clip_tables.c",
+        "src/dsp/dec_mips32.c",
+        "src/dsp/dec_mips_dsp_r2.c",
+        "src/dsp/dec_msa.c",
+        "src/dsp/dec_neon.c",
+        "src/dsp/dec_sse2.c",
+        "src/dsp/dec_sse41.c",
+        "src/dsp/enc.c",
+        "src/dsp/enc_mips32.c",
+        "src/dsp/enc_mips_dsp_r2.c",
+        "src/dsp/enc_msa.c",
+        "src/dsp/enc_neon.c",
+        "src/dsp/enc_sse2.c",
+        "src/dsp/enc_sse41.c",
+        "src/dsp/filters.c",
+        "src/dsp/filters_mips_dsp_r2.c",
+        "src/dsp/filters_msa.c",
+        "src/dsp/filters_neon.c",
+        "src/dsp/filters_sse2.c",
+        "src/dsp/lossless.c",
+        "src/dsp/lossless_enc.c",
+        "src/dsp/lossless_enc_mips32.c",
+        "src/dsp/lossless_enc_mips_dsp_r2.c",
+        "src/dsp/lossless_enc_msa.c",
+        "src/dsp/lossless_enc_neon.c",
+        "src/dsp/lossless_enc_sse2.c",
+        "src/dsp/lossless_enc_sse41.c",
+        "src/dsp/lossless_mips_dsp_r2.c",
+        "src/dsp/lossless_msa.c",
+        "src/dsp/lossless_neon.c",
+        "src/dsp/lossless_sse2.c",
+        "src/dsp/lossless_sse41.c",
+        "src/dsp/rescaler.c",
+        "src/dsp/rescaler_mips32.c",
+        "src/dsp/rescaler_mips_dsp_r2.c",
+        "src/dsp/rescaler_msa.c",
+        "src/dsp/rescaler_neon.c",
+        "src/dsp/rescaler_sse2.c",
+        "src/dsp/ssim.c",
+        "src/dsp/ssim_sse2.c",
+        "src/dsp/upsampling.c",
+        "src/dsp/upsampling_mips_dsp_r2.c",
+        "src/dsp/upsampling_msa.c",
+        "src/dsp/upsampling_neon.c",
+        "src/dsp/upsampling_sse2.c",
+        "src/dsp/upsampling_sse41.c",
+        "src/dsp/yuv.c",
+        "src/dsp/yuv_mips32.c",
+        "src/dsp/yuv_mips_dsp_r2.c",
+        "src/dsp/yuv_neon.c",
+        "src/dsp/yuv_sse2.c",
+        "src/dsp/yuv_sse41.c",
+        "src/enc/alpha_enc.c",
+        "src/enc/analysis_enc.c",
+        "src/enc/backward_references_cost_enc.c",
+        "src/enc/backward_references_enc.c",
+        "src/enc/config_enc.c",
+        "src/enc/cost_enc.c",
+        "src/enc/filter_enc.c",
+        "src/enc/frame_enc.c",
+        "src/enc/histogram_enc.c",
+        "src/enc/iterator_enc.c",
+        "src/enc/near_lossless_enc.c",
+        "src/enc/picture_csp_enc.c",
+        "src/enc/picture_enc.c",
+        "src/enc/picture_psnr_enc.c",
+        "src/enc/picture_rescale_enc.c",
+        "src/enc/picture_tools_enc.c",
+        "src/enc/predictor_enc.c",
+        "src/enc/quant_enc.c",
+        "src/enc/syntax_enc.c",
+        "src/enc/token_enc.c",
+        "src/enc/tree_enc.c",
+        "src/enc/vp8l_enc.c",
+        "src/enc/webp_enc.c",
+        "src/mux/anim_encode.c",
+        "src/mux/muxedit.c",
+        "src/mux/muxinternal.c",
+        "src/mux/muxread.c",
+        "src/utils/bit_reader_utils.c",
+        "src/utils/bit_writer_utils.c",
+        "src/utils/color_cache_utils.c",
+        "src/utils/filters_utils.c",
+        "src/utils/huffman_encode_utils.c",
+        "src/utils/huffman_utils.c",
+        "src/utils/quant_levels_dec_utils.c",
+        "src/utils/quant_levels_utils.c",
+        "src/utils/random_utils.c",
+        "src/utils/rescaler_utils.c",
+        "src/utils/thread_utils.c",
+        "src/utils/utils.c",
    ]
-    thirdparty_sources = [thirdparty_dir + "src/" + file for file in thirdparty_sources]
+    thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]

    env_webp.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "src/"])

--- a/platform/windows/crash_handler_windows.cpp
+++ b/platform/windows/crash_handler_windows.cpp
@ -156,7 +156,7 @@ DWORD CrashHandlerException(EXCEPTION_POINTERS *ep) {
 		return EXCEPTION_CONTINUE_SEARCH;
 	}

-	SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME);
+	SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME | SYMOPT_EXACT_SYMBOLS);
 	EnumProcessModules(process, &module_handles[0], module_handles.size() * sizeof(HMODULE), &cbNeeded);
 	module_handles.resize(cbNeeded / sizeof(HMODULE));
 	EnumProcessModules(process, &module_handles[0], module_handles.size() * sizeof(HMODULE), &cbNeeded);
--- a/platform/windows/os_windows.cpp
+++ b/platform/windows/os_windows.cpp
@ -3950,6 +3950,10 @@ OS_Windows::OS_Windows(HINSTANCE _hInstance) {
 	AudioDriverManager::add_driver(&driver_xaudio2);
 #endif

+	HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE);
+	DWORD outMode = ENABLE_PROCESSED_OUTPUT;
+	SetConsoleMode(stdoutHandle, outMode);
+
 	Vector<Logger *> loggers;
 	loggers.push_back(memnew(WindowsTerminalLogger));
 	_set_logger(memnew(CompositeLogger(loggers)));
--- a/platform/x11/joypad_linux.cpp
+++ b/platform/x11/joypad_linux.cpp
@ -222,8 +222,8 @@ void JoypadLinux::monitor_joypads() {
 			}
 		}
 		closedir(input_directory);
+		usleep(1000000); // 1s
 	}
-	usleep(1000000); // 1s
 }

 void JoypadLinux::close_joypads() {
--- a/scene/2d/back_buffer_copy.cpp
+++ b/scene/2d/back_buffer_copy.cpp
@ -71,11 +71,19 @@ Rect2 BackBufferCopy::get_rect() const {
 void BackBufferCopy::set_copy_mode(CopyMode p_mode) {
 	copy_mode = p_mode;
 	_update_copy_mode();
+	_change_notify();
 }
+
 BackBufferCopy::CopyMode BackBufferCopy::get_copy_mode() const {
 	return copy_mode;
 }

+void BackBufferCopy::_validate_property(PropertyInfo &p_property) const {
+	if (copy_mode != COPY_MODE_RECT && p_property.name == "rect") {
+		p_property.usage = PROPERTY_USAGE_NOEDITOR;
+	}
+}
+
 void BackBufferCopy::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_rect", "rect"), &BackBufferCopy::set_rect);
 	ClassDB::bind_method(D_METHOD("get_rect"), &BackBufferCopy::get_rect);
--- a/scene/2d/back_buffer_copy.h
+++ b/scene/2d/back_buffer_copy.h
@ -51,6 +51,7 @@ private:

 protected:
 	static void _bind_methods();
+	void _validate_property(PropertyInfo &p_property) const;

 public:
 #ifdef TOOLS_ENABLED
--- a/scene/2d/navigation_polygon.cpp
+++ b/scene/2d/navigation_polygon.cpp
@ -295,7 +295,9 @@ void NavigationPolygon::make_polygons_from_outlines() {

 	TriangulatorPartition tpart;
 	if (tpart.ConvexPartition_HM(&in_poly, &out_poly) == 0) { //failed!
-		ERR_PRINT("NavigationPolygon: Convex partition failed!");
+		ERR_PRINT("NavigationPolygon: Convex partition failed! Failed to convert outlines to a valid NavigationMesh."
+				  "\nNavigationPolygon outlines can not overlap vertices or edges inside same outline or with other outlines or have any intersections."
+				  "\nAdd the outmost and largest outline first. To add holes inside this outline add the smaller outlines with opposite winding order.");
 		return;
 	}

--- a/scene/3d/visual_instance.cpp
+++ b/scene/3d/visual_instance.cpp
@ -180,6 +180,24 @@ bool VisualInstance::get_layer_mask_bit(int p_layer) const {
 	return (layers & (1 << p_layer));
 }

+void VisualInstance::set_sorting_offset(float p_offset) {
+	sorting_offset = p_offset;
+	VisualServer::get_singleton()->instance_set_pivot_data(instance, sorting_offset, sorting_use_aabb_center);
+}
+
+float VisualInstance::get_sorting_offset() {
+	return sorting_offset;
+}
+
+void VisualInstance::set_sorting_use_aabb_center(bool p_enabled) {
+	sorting_use_aabb_center = p_enabled;
+	VisualServer::get_singleton()->instance_set_pivot_data(instance, sorting_offset, sorting_use_aabb_center);
+}
+
+bool VisualInstance::is_sorting_use_aabb_center() {
+	return sorting_use_aabb_center;
+}
+
 void VisualInstance::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("_get_visual_instance_rid"), &VisualInstance::_get_visual_instance_rid);
 	ClassDB::bind_method(D_METHOD("set_base", "base"), &VisualInstance::set_base);
@ -190,8 +208,16 @@ void VisualInstance::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_layer_mask_bit", "layer", "enabled"), &VisualInstance::set_layer_mask_bit);
 	ClassDB::bind_method(D_METHOD("get_layer_mask_bit", "layer"), &VisualInstance::get_layer_mask_bit);
 	ClassDB::bind_method(D_METHOD("get_transformed_aabb"), &VisualInstance::get_transformed_aabb);
+	ClassDB::bind_method(D_METHOD("set_sorting_offset", "offset"), &VisualInstance::set_sorting_offset);
+	ClassDB::bind_method(D_METHOD("get_sorting_offset"), &VisualInstance::get_sorting_offset);
+	ClassDB::bind_method(D_METHOD("set_sorting_use_aabb_center", "enabled"), &VisualInstance::set_sorting_use_aabb_center);
+	ClassDB::bind_method(D_METHOD("is_sorting_use_aabb_center"), &VisualInstance::is_sorting_use_aabb_center);

 	ADD_PROPERTY(PropertyInfo(Variant::INT, "layers", PROPERTY_HINT_LAYERS_3D_RENDER), "set_layer_mask", "get_layer_mask");
+
+	ADD_GROUP("Sorting", "sorting_");
+	ADD_PROPERTY(PropertyInfo(Variant::REAL, "sorting_offset"), "set_sorting_offset", "get_sorting_offset");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "sorting_use_aabb_center"), "set_sorting_use_aabb_center", "is_sorting_use_aabb_center");
 }

 void VisualInstance::set_base(const RID &p_base) {
@ -207,6 +233,8 @@ VisualInstance::VisualInstance() {
 	instance = RID_PRIME(VisualServer::get_singleton()->instance_create());
 	VisualServer::get_singleton()->instance_attach_object_instance_id(instance, get_instance_id());
 	layers = 1;
+	sorting_offset = 0.0f;
+	sorting_use_aabb_center = true;
 	set_notify_transform(true);
 }

--- a/scene/3d/visual_instance.h
+++ b/scene/3d/visual_instance.h
@ -43,6 +43,8 @@ class VisualInstance : public CullInstance {
 	RID base;
 	RID instance;
 	uint32_t layers;
+	float sorting_offset;
+	bool sorting_use_aabb_center;

 	RID _get_visual_instance_rid() const;

@ -77,6 +79,12 @@ public:
 	void set_layer_mask_bit(int p_layer, bool p_enable);
 	bool get_layer_mask_bit(int p_layer) const;

+	void set_sorting_offset(float p_offset);
+	float get_sorting_offset();
+
+	void set_sorting_use_aabb_center(bool p_enabled);
+	bool is_sorting_use_aabb_center();
+
 	VisualInstance();
 	~VisualInstance();
 };
--- a/scene/animation/scene_tree_tween.cpp
+++ b/scene/animation/scene_tree_tween.cpp
@ -65,10 +65,17 @@ Ref<PropertyTweener> SceneTreeTween::tween_property(Object *p_target, NodePath p
 	ERR_FAIL_COND_V_MSG(!valid, nullptr, "SceneTreeTween invalid. Either finished or created outside scene tree.");
 	ERR_FAIL_COND_V_MSG(started, nullptr, "Can't append to a SceneTreeTween that has started. Use stop() first.");

-#ifdef DEBUG_ENABLED
 	Variant::Type property_type = p_target->get_indexed(p_property.get_as_property_path().get_subnames()).get_type();
-	ERR_FAIL_COND_V_MSG(property_type != p_to.get_type(), Ref<PropertyTweener>(), "Type mismatch between property and final value: " + Variant::get_type_name(property_type) + " and " + Variant::get_type_name(p_to.get_type()));
-#endif
+	if (property_type != p_to.get_type()) {
+		// Cast p_to between floats and ints to avoid minor annoyances.
+		if (property_type == Variant::REAL && p_to.get_type() == Variant::INT) {
+			p_to = float(p_to);
+		} else if (property_type == Variant::INT && p_to.get_type() == Variant::REAL) {
+			p_to = int(p_to);
+		} else {
+			ERR_FAIL_V_MSG(Ref<PropertyTweener>(), "Type mismatch between property and final value: " + Variant::get_type_name(property_type) + " and " + Variant::get_type_name(p_to.get_type()));
+		}
+	}

 	Ref<PropertyTweener> tweener = memnew(PropertyTweener(p_target, p_property, p_to, p_duration));
 	append(tweener);
--- a/scene/main/canvas_layer.cpp
+++ b/scene/main/canvas_layer.cpp
@ -54,7 +54,7 @@ void CanvasLayer::set_visible(bool p_visible) {
 	// For CanvasItems that is explicitly top level or has non-CanvasItem parents.
 	if (is_inside_tree()) {
 		const String group = "root_canvas" + itos(canvas.get_id());
-		get_tree()->call_group_flags(SceneTree::GROUP_CALL_UNIQUE, group, "_toplevel_visibility_changed", p_visible);
+		get_tree()->call_group(group, "_toplevel_visibility_changed", p_visible);
 	}
 }

--- a/servers/physics/shape_sw.cpp
+++ b/servers/physics/shape_sw.cpp
@ -1967,7 +1967,7 @@ void HeightMapShapeSW::_get_cell(const Vector3 &p_point, int &r_x, int &r_y, int
 	Vector3 clamped_point(p_point);
 	clamped_point.x = CLAMP(p_point.x, pos_local.x, pos_local.x + aabb.size.x);
 	clamped_point.y = CLAMP(p_point.y, pos_local.y, pos_local.y + aabb.size.y);
-	clamped_point.z = CLAMP(p_point.z, pos_local.z, pos_local.x + aabb.size.z);
+	clamped_point.z = CLAMP(p_point.z, pos_local.z, pos_local.z + aabb.size.z);

 	r_x = (clamped_point.x < 0.0) ? (clamped_point.x - 0.5) : (clamped_point.x + 0.5);
 	r_y = (clamped_point.y < 0.0) ? (clamped_point.y - 0.5) : (clamped_point.y + 0.5);
--- a/servers/visual/visual_server_raster.h
+++ b/servers/visual/visual_server_raster.h
@ -579,6 +579,7 @@ public:
 	BIND2(instance_set_base, RID, RID)
 	BIND2(instance_set_scenario, RID, RID)
 	BIND2(instance_set_layer_mask, RID, uint32_t)
+	BIND3(instance_set_pivot_data, RID, float, bool)
 	BIND2(instance_set_transform, RID, const Transform &)
 	BIND2(instance_set_interpolated, RID, bool)
 	BIND1(instance_reset_physics_interpolation, RID)
--- a/servers/visual/visual_server_scene.cpp
+++ b/servers/visual/visual_server_scene.cpp
@ -792,6 +792,14 @@ void VisualServerScene::instance_set_layer_mask(RID p_instance, uint32_t p_mask)
 	instance->layer_mask = p_mask;
 }

+void VisualServerScene::instance_set_pivot_data(RID p_instance, float p_sorting_offset, bool p_use_aabb_center) {
+	Instance *instance = instance_owner.get(p_instance);
+	ERR_FAIL_COND(!instance);
+
+	instance->sorting_offset = p_sorting_offset;
+	instance->use_aabb_center = p_use_aabb_center;
+}
+
 void VisualServerScene::instance_reset_physics_interpolation(RID p_instance) {
 	Instance *instance = instance_owner.get(p_instance);
 	ERR_FAIL_COND(!instance);
@ -3267,11 +3275,14 @@ void VisualServerScene::_prepare_scene(const Transform p_cam_transform, const Ca
 		Instance *ins = instance_cull_result[i];

 		if (((1 << ins->base_type) & VS::INSTANCE_GEOMETRY_MASK) && ins->visible && ins->cast_shadows != VS::SHADOW_CASTING_SETTING_SHADOWS_ONLY) {
-			Vector3 aabb_center = ins->transformed_aabb.position + (ins->transformed_aabb.size * 0.5);
+			Vector3 center = ins->transform.origin;
+			if (ins->use_aabb_center) {
+				center = ins->transformed_aabb.position + (ins->transformed_aabb.size * 0.5);
+			}
 			if (p_cam_orthogonal) {
-				ins->depth = near_plane.distance_to(aabb_center);
+				ins->depth = near_plane.distance_to(center) - ins->sorting_offset;
 			} else {
-				ins->depth = p_cam_transform.origin.distance_to(aabb_center);
+				ins->depth = p_cam_transform.origin.distance_to(center) - ins->sorting_offset;
 			}
 			ins->depth_layer = CLAMP(int(ins->depth * 16 / z_far), 0, 15);
 		}
--- a/servers/visual/visual_server_scene.h
+++ b/servers/visual/visual_server_scene.h
@ -311,6 +311,8 @@ public:
 		AABB aabb;
 		AABB transformed_aabb;
 		AABB *custom_aabb; // <Zylann> would using aabb directly with a bool be better?
+		float sorting_offset;
+		bool use_aabb_center;
 		float extra_margin;
 		uint32_t object_id;

@ -371,6 +373,8 @@ public:
 			base_data = nullptr;

 			custom_aabb = nullptr;
+			sorting_offset = 0.0f;
+			use_aabb_center = false;
 		}

 		~Instance() {
@ -614,6 +618,7 @@ public:
 	virtual void instance_set_base(RID p_instance, RID p_base);
 	virtual void instance_set_scenario(RID p_instance, RID p_scenario);
 	virtual void instance_set_layer_mask(RID p_instance, uint32_t p_mask);
+	virtual void instance_set_pivot_data(RID p_instance, float p_sorting_offset, bool p_use_aabb_center);
 	virtual void instance_set_transform(RID p_instance, const Transform &p_transform);
 	virtual void instance_set_interpolated(RID p_instance, bool p_interpolated);
 	virtual void instance_reset_physics_interpolation(RID p_instance);
--- a/servers/visual/visual_server_wrap_mt.h
+++ b/servers/visual/visual_server_wrap_mt.h
@ -485,6 +485,7 @@ public:
 	FUNC2(instance_set_base, RID, RID)
 	FUNC2(instance_set_scenario, RID, RID)
 	FUNC2(instance_set_layer_mask, RID, uint32_t)
+	FUNC3(instance_set_pivot_data, RID, float, bool)
 	FUNC2(instance_set_transform, RID, const Transform &)
 	FUNC2(instance_set_interpolated, RID, bool)
 	FUNC1(instance_reset_physics_interpolation, RID)
--- a/servers/visual_server.h
+++ b/servers/visual_server.h
@ -866,6 +866,7 @@ public:
 	virtual void instance_set_base(RID p_instance, RID p_base) = 0;
 	virtual void instance_set_scenario(RID p_instance, RID p_scenario) = 0;
 	virtual void instance_set_layer_mask(RID p_instance, uint32_t p_mask) = 0;
+	virtual void instance_set_pivot_data(RID p_instance, float p_sorting_offset, bool p_use_aabb_center) = 0;
 	virtual void instance_set_transform(RID p_instance, const Transform &p_transform) = 0;
 	virtual void instance_set_interpolated(RID p_instance, bool p_interpolated) = 0;
 	virtual void instance_reset_physics_interpolation(RID p_instance) = 0;
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@ -35,7 +35,7 @@ Includes some patches in the `patches` folder which have been sent upstream.
 ## certs

 - Upstream: Mozilla, via https://github.com/bagder/ca-bundle
- Version: git (7f33e7eb8472dbcf31fdcf50cd216c89a282825d, 2022)
+- Version: git (b2f7415648411b6fd7c298c6c92d6552f0165f60, 2022)
 - License: MPL 2.0


@ -266,12 +266,12 @@ from the Android NDK r18.
 ## libwebp

 - Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022)
+- Version: 1.2.4 (0d1f12546bd803099a60c070517a552483f3790e, 2022)
 - License: BSD-3-Clause

 Files extracted from upstream source:

- `src/*` except from: `.am`, `.rc` and `.in` files
+- `src/` and `sharpyuv/` except from: `.am`, `.rc` and `.in` files
 - `AUTHORS`, `COPYING`, `PATENTS`


--- a/thirdparty/certs/ca-certificates.crt
+++ b/thirdparty/certs/ca-certificates.crt
@ -1,7 +1,7 @@
 ##
 ## Bundle of CA Root Certificates
 ##
-## Certificate data from Mozilla as of: Tue Jul 19 14:20:01 2022 GMT
+## Certificate data from Mozilla as of: Fri Oct 21 16:14:43 2022 GMT
 ##
 ## This is a bundle of X.509 certificates of public Certificate Authorities
 ## (CA). These were automatically extracted from Mozilla's root certificates
@ -14,7 +14,7 @@
 ## Just configure this file as the SSLCACertificateFile.
 ##
 ## Conversion done with mk-ca-bundle.pl version 1.29.
-## SHA256: 9bf3799611fb58197f61d45e71ce3dc19f30e7dd73731915872ce5108a7bb066
+## SHA256: 3ff8bd209b5f2e739b9f2b96eacb694a774114685b02978257824f37ff528f71
 ##


@ -3458,3 +3458,49 @@ zPUwHQYDVR0OBBYEFP+CMXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjO
 PQQDAwNpADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/67W4W
 Aie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFxvmjkI6TZraE3
 -----END CERTIFICATE-----
+
+Security Communication RootCA3
+==============================
+-----BEGIN CERTIFICATE-----
+MIIFfzCCA2egAwIBAgIJAOF8N0D9G/5nMA0GCSqGSIb3DQEBDAUAMF0xCzAJBgNVBAYTAkpQMSUw
+IwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMScwJQYDVQQDEx5TZWN1cml0eSBD
+b21tdW5pY2F0aW9uIFJvb3RDQTMwHhcNMTYwNjE2MDYxNzE2WhcNMzgwMTE4MDYxNzE2WjBdMQsw
+CQYDVQQGEwJKUDElMCMGA1UEChMcU0VDT00gVHJ1c3QgU3lzdGVtcyBDTy4sTFRELjEnMCUGA1UE
+AxMeU2VjdXJpdHkgQ29tbXVuaWNhdGlvbiBSb290Q0EzMIICIjANBgkqhkiG9w0BAQEFAAOCAg8A
+MIICCgKCAgEA48lySfcw3gl8qUCBWNO0Ot26YQ+TUG5pPDXC7ltzkBtnTCHsXzW7OT4rCmDvu20r
+hvtxosis5FaU+cmvsXLUIKx00rgVrVH+hXShuRD+BYD5UpOzQD11EKzAlrenfna84xtSGc4RHwsE
+NPXY9Wk8d/Nk9A2qhd7gCVAEF5aEt8iKvE1y/By7z/MGTfmfZPd+pmaGNXHIEYBMwXFAWB6+oHP2
+/D5Q4eAvJj1+XCO1eXDe+uDRpdYMQXF79+qMHIjH7Iv10S9VlkZ8WjtYO/u62C21Jdp6Ts9EriGm
+npjKIG58u4iFW/vAEGK78vknR+/RiTlDxN/e4UG/VHMgly1s2vPUB6PmudhvrvyMGS7TZ2crldtY
+XLVqAvO4g160a75BflcJdURQVc1aEWEhCmHCqYj9E7wtiS/NYeCVvsq1e+F7NGcLH7YMx3weGVPK
+p7FKFSBWFHA9K4IsD50VHUeAR/94mQ4xr28+j+2GaR57GIgUssL8gjMunEst+3A7caoreyYn8xrC
+3PsXuKHqy6C0rtOUfnrQq8PsOC0RLoi/1D+tEjtCrI8Cbn3M0V9hvqG8OmpI6iZVIhZdXw3/JzOf
+GAN0iltSIEdrRU0id4xVJ/CvHozJgyJUt5rQT9nO/NkuHJYosQLTA70lUhw0Zk8jq/R3gpYd0Vcw
+CBEF/VfR2ccCAwEAAaNCMEAwHQYDVR0OBBYEFGQUfPxYchamCik0FW8qy7z8r6irMA4GA1UdDwEB
+/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBDAUAA4ICAQDcAiMI4u8hOscNtybS
+YpOnpSNyByCCYN8Y11StaSWSntkUz5m5UoHPrmyKO1o5yGwBQ8IibQLwYs1OY0PAFNr0Y/Dq9HHu
+Tofjcan0yVflLl8cebsjqodEV+m9NU1Bu0soo5iyG9kLFwfl9+qd9XbXv8S2gVj/yP9kaWJ5rW4O
+H3/uHWnlt3Jxs/6lATWUVCvAUm2PVcTJ0rjLyjQIUYWg9by0F1jqClx6vWPGOi//lkkZhOpn2ASx
+YfQAW0q3nHE3GYV5v4GwxxMOdnE+OoAGrgYWp421wsTL/0ClXI2lyTrtcoHKXJg80jQDdwj98ClZ
+XSEIx2C/pHF7uNkegr4Jr2VvKKu/S7XuPghHJ6APbw+LP6yVGPO5DtxnVW5inkYO0QR4ynKudtml
+LLfiAlhi+8kTtFZP1rUPcmTPCtk9YENFpb3ksP+MW/oKjJ0DvRMmEoYDjBU1cXrvMUVnuiZIesn
+KwkK2/HmcBhWuwzkvvnoEKQTkrgc4NtnHVMDpCKn3F2SEDzq//wbEBrD2NCcnWXL0CsnMQMeNuE9
+dnUM/0Umud1RvCPHX9jYhxBAEg09ODfnRDwYwFMJZI//1ZqmfHAuc1Uh6N//g7kdPjIe1qZ9LPFm
+6Vwdp6POXiUyK+OVrCoHzrQoeIY8LaadTdJ0MN1kURXbg4NR16/9M51NZg==
+-----END CERTIFICATE-----
+
+Security Communication ECC RootCA1
+==================================
+-----BEGIN CERTIFICATE-----
+MIICODCCAb6gAwIBAgIJANZdm7N4gS7rMAoGCCqGSM49BAMDMGExCzAJBgNVBAYTAkpQMSUwIwYD
+VQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMSswKQYDVQQDEyJTZWN1cml0eSBDb21t
+dW5pY2F0aW9uIEVDQyBSb290Q0ExMB4XDTE2MDYxNjA1MTUyOFoXDTM4MDExODA1MTUyOFowYTEL
+MAkGA1UEBhMCSlAxJTAjBgNVBAoTHFNFQ09NIFRydXN0IFN5c3RlbXMgQ08uLExURC4xKzApBgNV
+BAMTIlNlY3VyaXR5IENvbW11bmljYXRpb24gRUNDIFJvb3RDQTEwdjAQBgcqhkjOPQIBBgUrgQQA
+IgNiAASkpW9gAwPDvTH00xecK4R1rOX9PVdu12O/5gSJko6BnOPpR27KkBLIE+CnnfdldB9sELLo
+5OnvbYUymUSxXv3MdhDYW72ixvnWQuRXdtyQwjWpS4g8EkdtXP9JTxpKULGjQjBAMB0GA1UdDgQW
+BBSGHOf+LaVKiwj+KBH6vqNm+GBZLzAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAK
+BggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3L
+snNdo4gIxwwCMQDAqy0Obe0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70e
+N9k=
+-----END CERTIFICATE-----
--- a/thirdparty/libwebp/AUTHORS
+++ b/thirdparty/libwebp/AUTHORS
@ -1,12 +1,15 @@
 Contributors:
 - Aidan O'Loan (aidanol at gmail dot com)
 - Alan Browning (browning at google dot com)
+- Alexandru Ardelean (ardeleanalex at gmail dot com)
+- Brian Ledger (brianpl at google dot com)
 - Charles Munger (clm at google dot com)
 - Cheng Yi (cyi at google dot com)
 - Christian Duvivier (cduvivier at google dot com)
 - Christopher Degawa (ccom at randomderp dot com)
 - Clement Courbet (courbet at google dot com)
 - Djordje Pesut (djordje dot pesut at imgtec dot com)
+- Frank Barchard (fbarchard at google dot com)
 - Hui Su (huisu at google dot com)
 - Ilya Kurdyukov (jpegqs at gmail dot com)
 - Ingvar Stepanyan (rreverser at google dot com)
@ -22,6 +25,7 @@ Contributors:
 - Mans Rullgard (mans at mansr dot com)
 - Marcin Kowalczyk (qrczak at google dot com)
 - Martin Olsson (mnemo at minimum dot se)
+- Maryla Ustarroz-Calonge (maryla at google dot com)
 - Mikołaj Zalewski (mikolajz at google dot com)
 - Mislav Bradac (mislavm at google dot com)
 - Nico Weber (thakis at chromium dot org)
--- a/thirdparty/libwebp/sharpyuv/sharpyuv.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv.c
@ -0,0 +1,498 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Sharp RGB to YUV conversion.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "sharpyuv/sharpyuv.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "src/webp/types.h"
+#include "src/dsp/cpu.h"
+#include "sharpyuv/sharpyuv_dsp.h"
+#include "sharpyuv/sharpyuv_gamma.h"
+
+//------------------------------------------------------------------------------
+// Sharp RGB->YUV conversion
+
+static const int kNumIterations = 4;
+
+#define YUV_FIX 16  // fixed-point precision for RGB->YUV
+static const int kYuvHalf = 1 << (YUV_FIX - 1);
+
+// Max bit depth so that intermediate calculations fit in 16 bits.
+static const int kMaxBitDepth = 14;
+
+// Returns the precision shift to use based on the input rgb_bit_depth.
+static int GetPrecisionShift(int rgb_bit_depth) {
+  // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
+  // bits if needed.
+  return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
+                                               : (kMaxBitDepth - rgb_bit_depth);
+}
+
+typedef int16_t fixed_t;      // signed type with extra precision for UV
+typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
+
+//------------------------------------------------------------------------------
+
+static uint8_t clip_8b(fixed_t v) {
+  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
+}
+
+static uint16_t clip(fixed_t v, int max) {
+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
+}
+
+static fixed_y_t clip_bit_depth(int y, int bit_depth) {
+  const int max = (1 << bit_depth) - 1;
+  return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
+}
+
+//------------------------------------------------------------------------------
+
+static int RGBToGray(int64_t r, int64_t g, int64_t b) {
+  const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
+  return (int)(luma >> YUV_FIX);
+}
+
+static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
+                          int rgb_bit_depth) {
+  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+  const uint32_t A = SharpYuvGammaToLinear(a, bit_depth);
+  const uint32_t B = SharpYuvGammaToLinear(b, bit_depth);
+  const uint32_t C = SharpYuvGammaToLinear(c, bit_depth);
+  const uint32_t D = SharpYuvGammaToLinear(d, bit_depth);
+  return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
+}
+
+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
+                                int rgb_bit_depth) {
+  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+  int i;
+  for (i = 0; i < w; ++i) {
+    const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth);
+    const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth);
+    const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth);
+    const uint32_t Y = RGBToGray(R, G, B);
+    dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth);
+  }
+}
+
+static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
+                         fixed_t* dst, int uv_w, int rgb_bit_depth) {
+  int i;
+  for (i = 0; i < uv_w; ++i) {
+    const int r =
+        ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
+                  src2[0 * uv_w + 1], rgb_bit_depth);
+    const int g =
+        ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
+                  src2[2 * uv_w + 1], rgb_bit_depth);
+    const int b =
+        ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
+                  src2[4 * uv_w + 1], rgb_bit_depth);
+    const int W = RGBToGray(r, g, b);
+    dst[0 * uv_w] = (fixed_t)(r - W);
+    dst[1 * uv_w] = (fixed_t)(g - W);
+    dst[2 * uv_w] = (fixed_t)(b - W);
+    dst  += 1;
+    src1 += 2;
+    src2 += 2;
+  }
+}
+
+static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
+  int i;
+  assert(w > 0);
+  for (i = 0; i < w; ++i) {
+    y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
+  const int v0 = (A * 3 + B + 2) >> 2;
+  return clip_bit_depth(v0 + W0, bit_depth);
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int Shift(int v, int shift) {
+  return (shift >= 0) ? (v << shift) : (v >> -shift);
+}
+
+static void ImportOneRow(const uint8_t* const r_ptr,
+                         const uint8_t* const g_ptr,
+                         const uint8_t* const b_ptr,
+                         int rgb_step,
+                         int rgb_bit_depth,
+                         int pic_width,
+                         fixed_y_t* const dst) {
+  // Convert the rgb_step from a number of bytes to a number of uint8_t or
+  // uint16_t values depending the bit depth.
+  const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
+  int i;
+  const int w = (pic_width + 1) & ~1;
+  for (i = 0; i < pic_width; ++i) {
+    const int off = i * step;
+    const int shift = GetPrecisionShift(rgb_bit_depth);
+    if (rgb_bit_depth == 8) {
+      dst[i + 0 * w] = Shift(r_ptr[off], shift);
+      dst[i + 1 * w] = Shift(g_ptr[off], shift);
+      dst[i + 2 * w] = Shift(b_ptr[off], shift);
+    } else {
+      dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
+      dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
+      dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
+    }
+  }
+  if (pic_width & 1) {  // replicate rightmost pixel
+    dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
+    dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
+    dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
+  }
+}
+
+static void InterpolateTwoRows(const fixed_y_t* const best_y,
+                               const fixed_t* prev_uv,
+                               const fixed_t* cur_uv,
+                               const fixed_t* next_uv,
+                               int w,
+                               fixed_y_t* out1,
+                               fixed_y_t* out2,
+                               int rgb_bit_depth) {
+  const int uv_w = w >> 1;
+  const int len = (w - 1) >> 1;   // length to filter
+  int k = 3;
+  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
+  while (k-- > 0) {   // process each R/G/B segments in turn
+    // special boundary case for i==0
+    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
+    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
+
+    SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
+                      bit_depth);
+    SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
+                      bit_depth);
+
+    // special boundary case for i == w - 1 when w is even
+    if (!(w & 1)) {
+      out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
+                            best_y[w - 1 + 0], bit_depth);
+      out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
+                            best_y[w - 1 + w], bit_depth);
+    }
+    out1 += w;
+    out2 += w;
+    prev_uv += uv_w;
+    cur_uv  += uv_w;
+    next_uv += uv_w;
+  }
+}
+
+static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
+                                         const int coeffs[4], int sfix) {
+  const int srounder = 1 << (YUV_FIX + sfix - 1);
+  const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
+                   coeffs[3] + srounder;
+  return (luma >> (YUV_FIX + sfix));
+}
+
+static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
+                            uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
+                            int u_stride, uint8_t* v_ptr, int v_stride,
+                            int rgb_bit_depth,
+                            int yuv_bit_depth, int width, int height,
+                            const SharpYuvConversionMatrix* yuv_matrix) {
+  int i, j;
+  const fixed_t* const best_uv_base = best_uv;
+  const int w = (width + 1) & ~1;
+  const int h = (height + 1) & ~1;
+  const int uv_w = w >> 1;
+  const int uv_h = h >> 1;
+  const int sfix = GetPrecisionShift(rgb_bit_depth);
+  const int yuv_max = (1 << yuv_bit_depth) - 1;
+
+  for (best_uv = best_uv_base, j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      const int off = (i >> 1);
+      const int W = best_y[i];
+      const int r = best_uv[off + 0 * uv_w] + W;
+      const int g = best_uv[off + 1 * uv_w] + W;
+      const int b = best_uv[off + 2 * uv_w] + W;
+      const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
+      if (yuv_bit_depth <= 8) {
+        y_ptr[i] = clip_8b(y);
+      } else {
+        ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
+      }
+    }
+    best_y += w;
+    best_uv += (j & 1) * 3 * uv_w;
+    y_ptr += y_stride;
+  }
+  for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
+    for (i = 0; i < uv_w; ++i) {
+      const int off = i;
+      // Note r, g and b values here are off by W, but a constant offset on all
+      // 3 components doesn't change the value of u and v with a YCbCr matrix.
+      const int r = best_uv[off + 0 * uv_w];
+      const int g = best_uv[off + 1 * uv_w];
+      const int b = best_uv[off + 2 * uv_w];
+      const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
+      const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
+      if (yuv_bit_depth <= 8) {
+        u_ptr[i] = clip_8b(u);
+        v_ptr[i] = clip_8b(v);
+      } else {
+        ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
+        ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
+      }
+    }
+    best_uv += 3 * uv_w;
+    u_ptr += u_stride;
+    v_ptr += v_stride;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main function
+
+static void* SafeMalloc(uint64_t nmemb, size_t size) {
+  const uint64_t total_size = nmemb * (uint64_t)size;
+  if (total_size != (size_t)total_size) return NULL;
+  return malloc((size_t)total_size);
+}
+
+#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
+
+static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
+                            const uint8_t* b_ptr, int rgb_step, int rgb_stride,
+                            int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
+                            uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
+                            int v_stride, int yuv_bit_depth, int width,
+                            int height,
+                            const SharpYuvConversionMatrix* yuv_matrix) {
+  // we expand the right/bottom border if needed
+  const int w = (width + 1) & ~1;
+  const int h = (height + 1) & ~1;
+  const int uv_w = w >> 1;
+  const int uv_h = h >> 1;
+  uint64_t prev_diff_y_sum = ~0;
+  int j, iter;
+
+  // TODO(skal): allocate one big memory chunk. But for now, it's easier
+  // for valgrind debugging to have several chunks.
+  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
+  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
+  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
+  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
+  fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+  fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
+  fixed_y_t* best_y = best_y_base;
+  fixed_y_t* target_y = target_y_base;
+  fixed_t* best_uv = best_uv_base;
+  fixed_t* target_uv = target_uv_base;
+  const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
+  int ok;
+  assert(w > 0);
+  assert(h > 0);
+
+  if (best_y_base == NULL || best_uv_base == NULL ||
+      target_y_base == NULL || target_uv_base == NULL ||
+      best_rgb_y == NULL || best_rgb_uv == NULL ||
+      tmp_buffer == NULL) {
+    ok = 0;
+    goto End;
+  }
+
+  // Import RGB samples to W/RGB representation.
+  for (j = 0; j < height; j += 2) {
+    const int is_last_row = (j == height - 1);
+    fixed_y_t* const src1 = tmp_buffer + 0 * w;
+    fixed_y_t* const src2 = tmp_buffer + 3 * w;
+
+    // prepare two rows of input
+    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
+                 src1);
+    if (!is_last_row) {
+      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
+                   rgb_step, rgb_bit_depth, width, src2);
+    } else {
+      memcpy(src2, src1, 3 * w * sizeof(*src2));
+    }
+    StoreGray(src1, best_y + 0, w);
+    StoreGray(src2, best_y + w, w);
+
+    UpdateW(src1, target_y, w, rgb_bit_depth);
+    UpdateW(src2, target_y + w, w, rgb_bit_depth);
+    UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth);
+    memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
+    best_y += 2 * w;
+    best_uv += 3 * uv_w;
+    target_y += 2 * w;
+    target_uv += 3 * uv_w;
+    r_ptr += 2 * rgb_stride;
+    g_ptr += 2 * rgb_stride;
+    b_ptr += 2 * rgb_stride;
+  }
+
+  // Iterate and resolve clipping conflicts.
+  for (iter = 0; iter < kNumIterations; ++iter) {
+    const fixed_t* cur_uv = best_uv_base;
+    const fixed_t* prev_uv = best_uv_base;
+    uint64_t diff_y_sum = 0;
+
+    best_y = best_y_base;
+    best_uv = best_uv_base;
+    target_y = target_y_base;
+    target_uv = target_uv_base;
+    for (j = 0; j < h; j += 2) {
+      fixed_y_t* const src1 = tmp_buffer + 0 * w;
+      fixed_y_t* const src2 = tmp_buffer + 3 * w;
+      {
+        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
+        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
+                           src1, src2, rgb_bit_depth);
+        prev_uv = cur_uv;
+        cur_uv = next_uv;
+      }
+
+      UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth);
+      UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth);
+      UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth);
+
+      // update two rows of Y and one row of RGB
+      diff_y_sum +=
+          SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
+                          rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
+      SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
+
+      best_y += 2 * w;
+      best_uv += 3 * uv_w;
+      target_y += 2 * w;
+      target_uv += 3 * uv_w;
+    }
+    // test exit condition
+    if (iter > 0) {
+      if (diff_y_sum < diff_y_threshold) break;
+      if (diff_y_sum > prev_diff_y_sum) break;
+    }
+    prev_diff_y_sum = diff_y_sum;
+  }
+
+  // final reconstruction
+  ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
+                        u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
+                        width, height, yuv_matrix);
+
+ End:
+  free(best_y_base);
+  free(best_uv_base);
+  free(target_y_base);
+  free(target_uv_base);
+  free(best_rgb_y);
+  free(best_rgb_uv);
+  free(tmp_buffer);
+  return ok;
+}
+#undef SAFE_ALLOC
+
+// Hidden exported init function.
+// By default SharpYuvConvert calls it with NULL. If needed, users can declare
+// it as extern and call it with a VP8CPUInfo function.
+extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
+void SharpYuvInit(VP8CPUInfo cpu_info_func) {
+  static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
+      (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
+  const int initialized =
+      (sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used);
+  if (cpu_info_func == NULL && initialized) return;
+  if (sharpyuv_last_cpuinfo_used == cpu_info_func) return;
+
+  SharpYuvInitDsp(cpu_info_func);
+  if (!initialized) {
+    SharpYuvInitGammaTables();
+  }
+
+  sharpyuv_last_cpuinfo_used = cpu_info_func;
+}
+
+int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
+                    const void* b_ptr, int rgb_step, int rgb_stride,
+                    int rgb_bit_depth, void* y_ptr, int y_stride,
+                    void* u_ptr, int u_stride, void* v_ptr,
+                    int v_stride, int yuv_bit_depth, int width,
+                    int height, const SharpYuvConversionMatrix* yuv_matrix) {
+  SharpYuvConversionMatrix scaled_matrix;
+  const int rgb_max = (1 << rgb_bit_depth) - 1;
+  const int rgb_round = 1 << (rgb_bit_depth - 1);
+  const int yuv_max = (1 << yuv_bit_depth) - 1;
+  const int sfix = GetPrecisionShift(rgb_bit_depth);
+
+  if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
+      r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
+      u_ptr == NULL || v_ptr == NULL) {
+    return 0;
+  }
+  if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
+      rgb_bit_depth != 16) {
+    return 0;
+  }
+  if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
+    return 0;
+  }
+  if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
+    // Step/stride should be even for uint16_t buffers.
+    return 0;
+  }
+  if (yuv_bit_depth > 8 &&
+      (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
+    // Stride should be even for uint16_t buffers.
+    return 0;
+  }
+  SharpYuvInit(NULL);
+
+  // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
+  // rgb->yuv conversion matrix.
+  if (rgb_bit_depth == yuv_bit_depth) {
+    memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
+  } else {
+    int i;
+    for (i = 0; i < 3; ++i) {
+      scaled_matrix.rgb_to_y[i] =
+          (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
+      scaled_matrix.rgb_to_u[i] =
+          (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
+      scaled_matrix.rgb_to_v[i] =
+          (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
+    }
+  }
+  // Also incorporate precision change scaling.
+  scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
+  scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
+  scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
+
+  return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
+                          rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
+                          v_ptr, v_stride, yuv_bit_depth, width, height,
+                          &scaled_matrix);
+}
+
+//------------------------------------------------------------------------------
--- a/thirdparty/libwebp/sharpyuv/sharpyuv.h
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv.h
@ -0,0 +1,81 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Sharp RGB to YUV conversion.
+
+#ifndef WEBP_SHARPYUV_SHARPYUV_H_
+#define WEBP_SHARPYUV_SHARPYUV_H_
+
+#include <inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// SharpYUV API version following the convention from semver.org
+#define SHARPYUV_VERSION_MAJOR 0
+#define SHARPYUV_VERSION_MINOR 1
+#define SHARPYUV_VERSION_PATCH 0
+// Version as a uint32_t. The major number is the high 8 bits.
+// The minor number is the middle 8 bits. The patch number is the low 16 bits.
+#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
+  (((MAJOR) << 24) | ((MINOR) << 16) | (PATCH))
+#define SHARPYUV_VERSION                                                \
+  SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
+                        SHARPYUV_VERSION_PATCH)
+
+// RGB to YUV conversion matrix, in 16 bit fixed point.
+// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
+// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
+// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
+// Then y, u and v values are divided by 1<<16 and rounded.
+typedef struct {
+  int rgb_to_y[4];
+  int rgb_to_u[4];
+  int rgb_to_v[4];
+} SharpYuvConversionMatrix;
+
+// Converts RGB to YUV420 using a downsampling algorithm that minimizes
+// artefacts caused by chroma subsampling.
+// This is slower than standard downsampling (averaging of 4 UV values).
+// Assumes that the image will be upsampled using a bilinear filter. If nearest
+// neighbor is used instead, the upsampled image might look worse than with
+// standard downsampling.
+// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
+//     to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
+// rgb_step: distance in bytes between two horizontally adjacent pixels on the
+//     r, g and b channels. If rgb_bit_depth is > 8, it should be a
+//     multiple of 2.
+// rgb_stride: distance in bytes between two vertically adjacent pixels on the
+//     r, g, and b channels. If rgb_bit_depth is > 8, it should be a
+//     multiple of 2.
+// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
+//     Note: 16 bit input is truncated to 14 bits before conversion to yuv.
+// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
+// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels.  Should
+//     point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
+//     otherwise.
+// y_stride, u_stride, v_stride: distance in bytes between two vertically
+//     adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
+//     should be multiples of 2.
+// width, height: width and height of the image in pixels
+int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
+                    int rgb_step, int rgb_stride, int rgb_bit_depth,
+                    void* y_ptr, int y_stride, void* u_ptr, int u_stride,
+                    void* v_ptr, int v_stride, int yuv_bit_depth, int width,
+                    int height, const SharpYuvConversionMatrix* yuv_matrix);
+
+// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
+// support (it's rarely used in practice, especially for images).
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_SHARPYUV_SHARPYUV_H_
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c
@ -0,0 +1,110 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Colorspace utilities.
+
+#include "sharpyuv/sharpyuv_csp.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
+
+void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
+                                     SharpYuvConversionMatrix* matrix) {
+  const float kr = yuv_color_space->kr;
+  const float kb = yuv_color_space->kb;
+  const float kg = 1.0f - kr - kb;
+  const float cr = 0.5f / (1.0f - kb);
+  const float cb = 0.5f / (1.0f - kr);
+
+  const int shift = yuv_color_space->bit_depth - 8;
+
+  const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
+  float scale_y = 1.0f;
+  float add_y = 0.0f;
+  float scale_u = cr;
+  float scale_v = cb;
+  float add_uv = (float)(128 << shift);
+  assert(yuv_color_space->bit_depth >= 8);
+
+  if (yuv_color_space->range == kSharpYuvRangeLimited) {
+    scale_y *= (219 << shift) / denom;
+    scale_u *= (224 << shift) / denom;
+    scale_v *= (224 << shift) / denom;
+    add_y = (float)(16 << shift);
+  }
+
+  matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
+  matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
+  matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
+  matrix->rgb_to_y[3] = ToFixed16(add_y);
+
+  matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
+  matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
+  matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u);
+  matrix->rgb_to_u[3] = ToFixed16(add_uv);
+
+  matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v);
+  matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v);
+  matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v);
+  matrix->rgb_to_v[3] = ToFixed16(add_uv);
+}
+
+// Matrices are in YUV_FIX fixed point precision.
+// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
+static const SharpYuvConversionMatrix kWebpMatrix = {
+  {16839, 33059, 6420, 16 << 16},
+  {-9719, -19081, 28800, 128 << 16},
+  {28800, -24116, -4684, 128 << 16},
+};
+// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
+static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
+  {16829, 33039, 6416, 16 << 16},
+  {-9714, -19071, 28784, 128 << 16},
+  {28784, -24103, -4681, 128 << 16},
+};
+// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
+static const SharpYuvConversionMatrix kRec601FullMatrix = {
+  {19595, 38470, 7471, 0},
+  {-11058, -21710, 32768, 128 << 16},
+  {32768, -27439, -5329, 128 << 16},
+};
+// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
+static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
+  {11966, 40254, 4064, 16 << 16},
+  {-6596, -22189, 28784, 128 << 16},
+  {28784, -26145, -2639, 128 << 16},
+};
+// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
+static const SharpYuvConversionMatrix kRec709FullMatrix = {
+  {13933, 46871, 4732, 0},
+  {-7509, -25259, 32768, 128 << 16},
+  {32768, -29763, -3005, 128 << 16},
+};
+
+const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
+    SharpYuvMatrixType matrix_type) {
+  switch (matrix_type) {
+    case kSharpYuvMatrixWebp:
+      return &kWebpMatrix;
+    case kSharpYuvMatrixRec601Limited:
+      return &kRec601LimitedMatrix;
+    case kSharpYuvMatrixRec601Full:
+      return &kRec601FullMatrix;
+    case kSharpYuvMatrixRec709Limited:
+      return &kRec709LimitedMatrix;
+    case kSharpYuvMatrixRec709Full:
+      return &kRec709FullMatrix;
+    case kSharpYuvMatrixNum:
+      return NULL;
+  }
+  return NULL;
+}
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_csp.h
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.h
@ -0,0 +1,59 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Colorspace utilities.
+
+#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_
+#define WEBP_SHARPYUV_SHARPYUV_CSP_H_
+
+#include "sharpyuv/sharpyuv.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Range of YUV values.
+typedef enum {
+  kSharpYuvRangeFull,     // YUV values between [0;255] (for 8 bit)
+  kSharpYuvRangeLimited   // Y in [16;235], YUV in [16;240] (for 8 bit)
+} SharpYuvRange;
+
+// Constants that define a YUV color space.
+typedef struct {
+  // Kr and Kb are defined such that:
+  // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
+  float kr;
+  float kb;
+  int bit_depth;  // 8, 10 or 12
+  SharpYuvRange range;
+} SharpYuvColorSpace;
+
+// Fills in 'matrix' for the given YUVColorSpace.
+void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
+                                     SharpYuvConversionMatrix* matrix);
+
+// Enums for precomputed conversion matrices.
+typedef enum {
+  kSharpYuvMatrixWebp = 0,
+  kSharpYuvMatrixRec601Limited,
+  kSharpYuvMatrixRec601Full,
+  kSharpYuvMatrixRec709Limited,
+  kSharpYuvMatrixRec709Full,
+  kSharpYuvMatrixNum
+} SharpYuvMatrixType;
+
+// Returns a pointer to a matrix for one of the predefined colorspaces.
+const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
+    SharpYuvMatrixType matrix_type);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_SHARPYUV_SHARPYUV_CSP_H_
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c
@ -0,0 +1,102 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical functions for Sharp YUV.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "sharpyuv/sharpyuv_dsp.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "src/dsp/cpu.h"
+
+//-----------------------------------------------------------------------------
+
+#if !WEBP_NEON_OMIT_C_CODE
+static uint16_t clip(int v, int max) {
+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
+}
+
+static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
+                                  uint16_t* dst, int len, int bit_depth) {
+  uint64_t diff = 0;
+  int i;
+  const int max_y = (1 << bit_depth) - 1;
+  for (i = 0; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)dst[i] + diff_y;
+    dst[i] = clip(new_y, max_y);
+    diff += (uint64_t)abs(diff_y);
+  }
+  return diff;
+}
+
+static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
+                                int16_t* dst, int len) {
+  int i;
+  for (i = 0; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
+                                const uint16_t* best_y, uint16_t* out,
+                                int bit_depth) {
+  int i;
+  const int max_y = (1 << bit_depth) - 1;
+  for (i = 0; i < len; ++i, ++A, ++B) {
+    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
+    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
+    out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
+    out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+//-----------------------------------------------------------------------------
+
+uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
+                            uint16_t* dst, int len, int bit_depth);
+void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
+                          int len);
+void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
+                          const uint16_t* best_y, uint16_t* out,
+                          int bit_depth);
+
+extern void InitSharpYuvSSE2(void);
+extern void InitSharpYuvNEON(void);
+
+void SharpYuvInitDsp(VP8CPUInfo cpu_info_func) {
+  (void)cpu_info_func;
+
+#if !WEBP_NEON_OMIT_C_CODE
+  SharpYuvUpdateY = SharpYuvUpdateY_C;
+  SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
+  SharpYuvFilterRow = SharpYuvFilterRow_C;
+#endif
+
+#if defined(WEBP_HAVE_SSE2)
+  if (cpu_info_func == NULL || cpu_info_func(kSSE2)) {
+    InitSharpYuvSSE2();
+  }
+#endif  // WEBP_HAVE_SSE2
+
+#if defined(WEBP_HAVE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE || cpu_info_func == NULL || cpu_info_func(kNEON)) {
+    InitSharpYuvNEON();
+  }
+#endif  // WEBP_HAVE_NEON
+
+  assert(SharpYuvUpdateY != NULL);
+  assert(SharpYuvUpdateRGB != NULL);
+  assert(SharpYuvFilterRow != NULL);
+}
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h
@ -0,0 +1,29 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical functions for Sharp YUV.
+
+#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
+#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
+
+#include <stdint.h>
+
+#include "src/dsp/cpu.h"
+
+extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
+                                   uint16_t* dst, int len, int bit_depth);
+extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
+                                 int16_t* dst, int len);
+extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
+                                 const uint16_t* best_y, uint16_t* out,
+                                 int bit_depth);
+
+void SharpYuvInitDsp(VP8CPUInfo cpu_info_func);
+
+#endif  // WEBP_SHARPYUV_SHARPYUV_DSP_H_
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c
@ -0,0 +1,114 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Gamma correction utilities.
+
+#include "sharpyuv/sharpyuv_gamma.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+
+#include "src/webp/types.h"
+
+// Gamma correction compensates loss of resolution during chroma subsampling.
+// Size of pre-computed table for converting from gamma to linear.
+#define GAMMA_TO_LINEAR_TAB_BITS 10
+#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
+static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
+#define LINEAR_TO_GAMMA_TAB_BITS 9
+#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
+static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
+
+static const double kGammaF = 1. / 0.45;
+#define GAMMA_TO_LINEAR_BITS 16
+
+static volatile int kGammaTablesSOk = 0;
+void SharpYuvInitGammaTables(void) {
+  assert(GAMMA_TO_LINEAR_BITS <= 16);
+  if (!kGammaTablesSOk) {
+    int v;
+    const double a = 0.09929682680944;
+    const double thresh = 0.018053968510807;
+    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
+    // Precompute gamma to linear table.
+    {
+      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
+      const double a_rec = 1. / (1. + a);
+      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
+        const double g = norm * v;
+        double value;
+        if (g <= thresh * 4.5) {
+          value = g / 4.5;
+        } else {
+          value = pow(a_rec * (g + a), kGammaF);
+        }
+        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
+      }
+      // to prevent small rounding errors to cause read-overflow:
+      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
+          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
+    }
+    // Precompute linear to gamma table.
+    {
+      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
+      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
+        const double g = scale * v;
+        double value;
+        if (g <= thresh) {
+          value = 4.5 * g;
+        } else {
+          value = (1. + a) * pow(g, 1. / kGammaF) - a;
+        }
+        kLinearToGammaTabS[v] =
+            (uint32_t)(final_scale * value + 0.5);
+      }
+      // to prevent small rounding errors to cause read-overflow:
+      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
+          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
+    }
+    kGammaTablesSOk = 1;
+  }
+}
+
+static WEBP_INLINE int Shift(int v, int shift) {
+  return (shift >= 0) ? (v << shift) : (v >> -shift);
+}
+
+static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
+                                                    int tab_pos_shift_right,
+                                                    int tab_value_shift) {
+  const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
+  // fractional part, in 'tab_pos_shift' fixed-point precision
+  const uint32_t x = v - (tab_pos << tab_pos_shift_right);  // fractional part
+  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
+  const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
+  const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
+  // Final interpolation.
+  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
+  const int half =
+      (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
+  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
+  return result;
+}
+
+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth) {
+  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
+  if (shift > 0) {
+    return kGammaToLinearTabS[v << shift];
+  }
+  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
+}
+
+uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth) {
+  return FixedPointInterpolation(
+      value, kLinearToGammaTabS,
+      (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
+      bit_depth - GAMMA_TO_LINEAR_BITS);
+}
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h
@ -0,0 +1,35 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Gamma correction utilities.
+
+#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
+#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initializes precomputed tables. Must be called once before calling
+// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
+void SharpYuvInitGammaTables(void);
+
+// Converts a gamma color value on 'bit_depth' bits to a 16 bit linear value.
+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth);
+
+// Converts a 16 bit linear color value to a gamma value on 'bit_depth' bits.
+uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_neon.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_neon.c
@ -0,0 +1,182 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical functions for Sharp YUV.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "sharpyuv/sharpyuv_dsp.h"
+
+#if defined(WEBP_USE_NEON)
+#include <assert.h>
+#include <stdlib.h>
+#include <arm_neon.h>
+#endif
+
+extern void InitSharpYuvNEON(void);
+
+#if defined(WEBP_USE_NEON)
+
+static uint16_t clip_NEON(int v, int max) {
+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
+}
+
+static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
+                                     uint16_t* dst, int len, int bit_depth) {
+  const int max_y = (1 << bit_depth) - 1;
+  int i;
+  const int16x8_t zero = vdupq_n_s16(0);
+  const int16x8_t max = vdupq_n_s16(max_y);
+  uint64x2_t sum = vdupq_n_u64(0);
+  uint64_t diff;
+
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
+    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
+    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
+    const int16x8_t D = vsubq_s16(A, B);       // diff_y
+    const int16x8_t F = vaddq_s16(C, D);       // new_y
+    const uint16x8_t H =
+        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
+    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
+    vst1q_u16(dst + i, H);
+    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
+  }
+  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
+  for (; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)(dst[i]) + diff_y;
+    dst[i] = clip_NEON(new_y, max_y);
+    diff += (uint64_t)(abs(diff_y));
+  }
+  return diff;
+}
+
+static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
+                                   int16_t* dst, int len) {
+  int i;
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t A = vld1q_s16(ref + i);
+    const int16x8_t B = vld1q_s16(src + i);
+    const int16x8_t C = vld1q_s16(dst + i);
+    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
+    const int16x8_t E = vaddq_s16(C, D);   // new_uv
+    vst1q_s16(dst + i, E);
+  }
+  for (; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B,
+                                     int len, const uint16_t* best_y,
+                                     uint16_t* out, int bit_depth) {
+  const int max_y = (1 << bit_depth) - 1;
+  int i;
+  const int16x8_t max = vdupq_n_s16(max_y);
+  const int16x8_t zero = vdupq_n_s16(0);
+  for (i = 0; i + 8 <= len; i += 8) {
+    const int16x8_t a0 = vld1q_s16(A + i + 0);
+    const int16x8_t a1 = vld1q_s16(A + i + 1);
+    const int16x8_t b0 = vld1q_s16(B + i + 0);
+    const int16x8_t b1 = vld1q_s16(B + i + 1);
+    const int16x8_t a0b1 = vaddq_s16(a0, b1);
+    const int16x8_t a1b0 = vaddq_s16(a1, b0);
+    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
+    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
+    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
+    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
+    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
+    const int16x8_t e0 = vrhaddq_s16(c1, a0);
+    const int16x8_t e1 = vrhaddq_s16(c0, a1);
+    const int16x8x2_t f = vzipq_s16(e0, e1);
+    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
+    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
+    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
+    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
+    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
+    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
+    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
+    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
+  }
+  for (; i < len; ++i) {
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
+    out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
+  }
+}
+
+static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B,
+                                     int len, const uint16_t* best_y,
+                                     uint16_t* out, int bit_depth) {
+  const int max_y = (1 << bit_depth) - 1;
+  int i;
+  const uint16x8_t max = vdupq_n_u16(max_y);
+  for (i = 0; i + 4 <= len; i += 4) {
+    const int16x4_t a0 = vld1_s16(A + i + 0);
+    const int16x4_t a1 = vld1_s16(A + i + 1);
+    const int16x4_t b0 = vld1_s16(B + i + 0);
+    const int16x4_t b1 = vld1_s16(B + i + 1);
+    const int32x4_t a0b1 = vaddl_s16(a0, b1);
+    const int32x4_t a1b0 = vaddl_s16(a1, b0);
+    const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0);  // A0+A1+B0+B1
+    const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1);    // 2*(A0+B1)
+    const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0);    // 2*(A1+B0)
+    const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3);
+    const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3);
+    const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0));
+    const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1));
+    const int32x4x2_t f = vzipq_s32(e0, e1);
+
+    const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i));
+    const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g));
+    const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g));
+    const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1));
+    const uint16x8_t i_clamped = vminq_u16(i_16, max);
+    vst1q_u16(out + 2 * i + 0, i_clamped);
+  }
+  for (; i < len; ++i) {
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
+    out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
+  }
+}
+
+static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
+                                   const uint16_t* best_y, uint16_t* out,
+                                   int bit_depth) {
+  if (bit_depth <= 10) {
+    SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth);
+  } else {
+    SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
+  SharpYuvUpdateY = SharpYuvUpdateY_NEON;
+  SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
+  SharpYuvFilterRow = SharpYuvFilterRow_NEON;
+}
+
+#else  // !WEBP_USE_NEON
+
+void InitSharpYuvNEON(void) {}
+
+#endif  // WEBP_USE_NEON
--- a/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c
+++ b/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c
@ -0,0 +1,204 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical functions for Sharp YUV.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "sharpyuv/sharpyuv_dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <stdlib.h>
+#include <emmintrin.h>
+#endif
+
+extern void InitSharpYuvSSE2(void);
+
+#if defined(WEBP_USE_SSE2)
+
+static uint16_t clip_SSE2(int v, int max) {
+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
+}
+
+static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
+                                     uint16_t* dst, int len, int bit_depth) {
+  const int max_y = (1 << bit_depth) - 1;
+  uint64_t diff = 0;
+  uint32_t tmp[4];
+  int i;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i max = _mm_set1_epi16(max_y);
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i sum = zero;
+
+  for (i = 0; i + 8 <= len; i += 8) {
+    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
+    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
+    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
+    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
+    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
+    const __m128i F = _mm_add_epi16(C, D);       // new_y
+    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
+    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
+    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
+    _mm_storeu_si128((__m128i*)(dst + i), H);
+    sum = _mm_add_epi32(sum, I);
+  }
+  _mm_storeu_si128((__m128i*)tmp, sum);
+  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
+  for (; i < len; ++i) {
+    const int diff_y = ref[i] - src[i];
+    const int new_y = (int)dst[i] + diff_y;
+    dst[i] = clip_SSE2(new_y, max_y);
+    diff += (uint64_t)abs(diff_y);
+  }
+  return diff;
+}
+
+static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
+                                   int16_t* dst, int len) {
+  int i = 0;
+  for (i = 0; i + 8 <= len; i += 8) {
+    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
+    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
+    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
+    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
+    const __m128i E = _mm_add_epi16(C, D);   // new_uv
+    _mm_storeu_si128((__m128i*)(dst + i), E);
+  }
+  for (; i < len; ++i) {
+    const int diff_uv = ref[i] - src[i];
+    dst[i] += diff_uv;
+  }
+}
+
+static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
+                                     int len, const uint16_t* best_y,
+                                     uint16_t* out, int bit_depth) {
+  const int max_y = (1 << bit_depth) - 1;
+  int i;
+  const __m128i kCst8 = _mm_set1_epi16(8);
+  const __m128i max = _mm_set1_epi16(max_y);
+  const __m128i zero = _mm_setzero_si128();
+  for (i = 0; i + 8 <= len; i += 8) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
+    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
+    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
+    const __m128i a0b1 = _mm_add_epi16(a0, b1);
+    const __m128i a1b0 = _mm_add_epi16(a1, b0);
+    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
+    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
+    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
+    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
+    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
+    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
+    const __m128i d0 = _mm_add_epi16(c1, a0);
+    const __m128i d1 = _mm_add_epi16(c0, a1);
+    const __m128i e0 = _mm_srai_epi16(d0, 1);
+    const __m128i e1 = _mm_srai_epi16(d1, 1);
+    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
+    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
+    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
+    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
+    const __m128i h0 = _mm_add_epi16(g0, f0);
+    const __m128i h1 = _mm_add_epi16(g1, f1);
+    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
+    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
+    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
+    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
+  }
+  for (; i < len; ++i) {
+    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
+    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
+    // We reuse the common sub-expressions.
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
+    out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
+  }
+}
+
+static WEBP_INLINE __m128i s16_to_s32(__m128i in) {
+  return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16);
+}
+
+static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B,
+                                     int len, const uint16_t* best_y,
+                                     uint16_t* out, int bit_depth) {
+  const int max_y = (1 << bit_depth) - 1;
+  int i;
+  const __m128i kCst8 = _mm_set1_epi32(8);
+  const __m128i max = _mm_set1_epi16(max_y);
+  const __m128i zero = _mm_setzero_si128();
+  for (i = 0; i + 4 <= len; i += 4) {
+    const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0)));
+    const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1)));
+    const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0)));
+    const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1)));
+    const __m128i a0b1 = _mm_add_epi32(a0, b1);
+    const __m128i a1b0 = _mm_add_epi32(a1, b0);
+    const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0);  // A0+A1+B0+B1
+    const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8);
+    const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1);  // 2*(A0+B1)
+    const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0);  // 2*(A1+B0)
+    const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3);
+    const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3);
+    const __m128i d0 = _mm_add_epi32(c1, a0);
+    const __m128i d1 = _mm_add_epi32(c0, a1);
+    const __m128i e0 = _mm_srai_epi32(d0, 1);
+    const __m128i e1 = _mm_srai_epi32(d1, 1);
+    const __m128i f0 = _mm_unpacklo_epi32(e0, e1);
+    const __m128i f1 = _mm_unpackhi_epi32(e0, e1);
+    const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
+    const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1));
+    const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero);
+    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), final);
+  }
+  for (; i < len; ++i) {
+    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
+    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
+    // We reuse the common sub-expressions.
+    const int a0b1 = A[i + 0] + B[i + 1];
+    const int a1b0 = A[i + 1] + B[i + 0];
+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
+    out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
+    out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
+  }
+}
+
+static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
+                                   const uint16_t* best_y, uint16_t* out,
+                                   int bit_depth) {
+  if (bit_depth <= 10) {
+    SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth);
+  } else {
+    SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+extern void InitSharpYuvSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
+  SharpYuvUpdateY = SharpYuvUpdateY_SSE2;
+  SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2;
+  SharpYuvFilterRow = SharpYuvFilterRow_SSE2;
+}
+#else  // !WEBP_USE_SSE2
+
+void InitSharpYuvSSE2(void) {}
+
+#endif  // WEBP_USE_SSE2
--- a/thirdparty/libwebp/src/dec/vp8i_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8i_dec.h
@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 1
 #define DEC_MIN_VERSION 2
-#define DEC_REV_VERSION 2
+#define DEC_REV_VERSION 4

 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
--- a/thirdparty/libwebp/src/dec/vp8l_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8l_dec.c
@ -178,7 +178,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {

 //------------------------------------------------------------------------------
 // Decodes the next Huffman code from bit-stream.
-// FillBitWindow(br) needs to be called at minimum every second call
+// VP8LFillBitWindow(br) needs to be called at minimum every second call
 // to ReadSymbol, in order to pre-fetch enough bits.
 static WEBP_INLINE int ReadSymbol(const HuffmanCode* table,
                                  VP8LBitReader* const br) {
@ -321,7 +321,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
    // The first code is either 1 bit or 8 bit code.
    int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
    code_lengths[symbol] = 1;
-    // The second code (if present), is always 8 bit long.
+    // The second code (if present), is always 8 bits long.
    if (num_symbols == 2) {
      symbol = VP8LReadBits(br, 8);
      code_lengths[symbol] = 1;
@ -1281,7 +1281,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
    uint8_t* const new_data = (uint8_t*)new_color_map;
    new_color_map[0] = transform->data_[0];
    for (i = 4; i < 4 * num_colors; ++i) {
-      // Equivalent to AddPixelEq(), on a byte-basis.
+      // Equivalent to VP8LAddPixels(), on a byte-basis.
      new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
    }
    for (; i < 4 * final_num_colors; ++i) {
--- a/thirdparty/libwebp/src/demux/demux.c
+++ b/thirdparty/libwebp/src/demux/demux.c
@ -25,7 +25,7 @@

 #define DMUX_MAJ_VERSION 1
 #define DMUX_MIN_VERSION 2
-#define DMUX_REV_VERSION 2
+#define DMUX_REV_VERSION 4

 typedef struct {
  size_t start_;        // start location of the data
@ -614,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {

  while (f != NULL) {
    const int cur_frame_set = f->frame_num_;
-    int frame_count = 0;

    // Check frame properties.
    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
@ -649,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
                            dmux->canvas_width_, dmux->canvas_height_)) {
        return 0;
      }
-
-      ++frame_count;
    }
  }
  return 1;
--- a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
+++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c
@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
 static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
                              int alpha_stride, int width, int height,
                              uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-  uint32_t alpha_mask = 0xffffffffu;
+  uint32_t alpha_mask = 0xffu;
  uint8x8_t mask8 = vdup_n_u8(0xff);
  uint32_t tmp[2];
  int i, j;
@ -107,6 +107,7 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
    dst += dst_stride;
  }
  vst1_u8((uint8_t*)tmp, mask8);
+  alpha_mask *= 0x01010101;
  alpha_mask &= tmp[0];
  alpha_mask &= tmp[1];
  return (alpha_mask != 0xffffffffu);
@ -135,7 +136,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
 static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
                             int width, int height,
                             uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-  uint32_t alpha_mask = 0xffffffffu;
+  uint32_t alpha_mask = 0xffu;
  uint8x8_t mask8 = vdup_n_u8(0xff);
  uint32_t tmp[2];
  int i, j;
@ -157,6 +158,7 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
    alpha += alpha_stride;
  }
  vst1_u8((uint8_t*)tmp, mask8);
+  alpha_mask *= 0x01010101;
  alpha_mask &= tmp[0];
  alpha_mask &= tmp[1];
  return (alpha_mask == 0xffffffffu);
--- a/thirdparty/libwebp/src/dsp/cpu.c
+++ b/thirdparty/libwebp/src/dsp/cpu.c
@ -11,7 +11,7 @@
 //
 // Author: Christian Duvivier (cduvivier@google.com)

-#include "src/dsp/dsp.h"
+#include "src/dsp/cpu.h"

 #if defined(WEBP_HAVE_NEON_RTCD)
 #include <stdio.h>
--- a/thirdparty/libwebp/src/dsp/cpu.h
+++ b/thirdparty/libwebp/src/dsp/cpu.h
@ -0,0 +1,254 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   CPU detection functions and macros.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_CPU_H_
+#define WEBP_DSP_CPU_H_
+
+#ifdef HAVE_CONFIG_H
+#include "src/webp/config.h"
+#endif
+
+#include "src/webp/types.h"
+
+#if defined(__GNUC__)
+#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_GCC_VERSION 0
+#define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#if defined(__clang__)
+#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+#define LOCAL_CLANG_PREREQ(maj, min) \
+  (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_CLANG_VERSION 0
+#define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
+#endif
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
+#define WEBP_HAVE_SSE2
+#endif
+
+#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
+#define WEBP_HAVE_SSE41
+#endif
+
+#undef WEBP_MSC_SSE41
+#undef WEBP_MSC_SSE2
+
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if ((defined(__ARM_NEON__) || defined(__aarch64__)) &&       \
+     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
+    !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
+    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
+#define WEBP_ANDROID_NEON  // Android targets that may have NEON
+#define WEBP_USE_NEON
+#endif
+
+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
+// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
+// vtbl4_u8(); a fix was made in 16.6.
+#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
+                          (_MSC_VER >= 1926 && defined(_M_ARM64)))
+#define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
+#define WEBP_HAVE_NEON
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
+    (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
+#define WEBP_USE_MSA
+#endif
+
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
+      defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
+#endif
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define WEBP_MSAN
+#endif
+#endif
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h>  // NOLINT
+
+#define WEBP_DSP_INIT(func)                                         \
+  do {                                                              \
+    static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
+        (VP8CPUInfo)&func##_last_cpuinfo_used;                      \
+    static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
+    if (pthread_mutex_lock(&func##_lock)) break;                    \
+    if (func##_last_cpuinfo_used != VP8GetCPUInfo) func();          \
+    func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
+    (void)pthread_mutex_unlock(&func##_lock);                       \
+  } while (0)
+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+#define WEBP_DSP_INIT(func)                               \
+  do {                                                    \
+    static volatile VP8CPUInfo func##_last_cpuinfo_used = \
+        (VP8CPUInfo)&func##_last_cpuinfo_used;            \
+    if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
+    func();                                               \
+    func##_last_cpuinfo_used = VP8GetCPUInfo;             \
+  } while (0)
+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
+
+// Defines an Init + helper function that control multiple initialization of
+// function pointers / tables.
+/* Usage:
+   WEBP_DSP_INIT_FUNC(InitFunc) {
+     ...function body
+   }
+*/
+#define WEBP_DSP_INIT_FUNC(name)                                            \
+  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void);                  \
+  WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
+  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
+
+#define WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures. This is only meant to silence unaligned loads on platforms that
+// are known to support them.
+#undef WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
+
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures related to unsigned integer overflows. This is only meant to
+// silence cases where this well defined behavior is expected.
+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
+  __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
+// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
+#if !defined(WEBP_OFFSET_PTR)
+#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
+#endif
+
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) &&                   \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+typedef enum {
+  kSSE2,
+  kSSE3,
+  kSlowSSSE3,  // special feature for slow SSSE3 architectures
+  kSSE4_1,
+  kAVX,
+  kAVX2,
+  kNEON,
+  kMIPS32,
+  kMIPSdspR2,
+  kMSA
+} CPUFeature;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  // WEBP_DSP_CPU_H_
--- a/thirdparty/libwebp/src/dsp/dsp.h
+++ b/thirdparty/libwebp/src/dsp/dsp.h
@ -18,6 +18,7 @@
 #include "src/webp/config.h"
 #endif

+#include "src/dsp/cpu.h"
 #include "src/webp/types.h"

 #ifdef __cplusplus
@ -43,225 +44,6 @@ extern "C" {
 #define WEBP_RESTRICT
 #endif

-//------------------------------------------------------------------------------
-// CPU detection
-
-#if defined(__GNUC__)
-# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-# define LOCAL_GCC_PREREQ(maj, min) \
-    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_GCC_VERSION 0
-# define LOCAL_GCC_PREREQ(maj, min) 0
-#endif
-
-#if defined(__clang__)
-# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-# define LOCAL_CLANG_PREREQ(maj, min) \
-    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_CLANG_VERSION 0
-# define LOCAL_CLANG_PREREQ(maj, min) 0
-#endif
-
-#ifndef __has_builtin
-# define __has_builtin(x) 0
-#endif
-
-#if !defined(HAVE_CONFIG_H)
-#if defined(_MSC_VER) && _MSC_VER > 1310 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
-    (defined(_M_X64) || defined(_M_IX86))
-#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
-#endif
-#endif
-
-// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
-// files without intrinsics, allowing the corresponding Init() to be called.
-// Files containing intrinsics will need to be built targeting the instruction
-// set so should succeed on one of the earlier tests.
-#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
-    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
-#define WEBP_USE_SSE2
-#endif
-
-#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
-#define WEBP_HAVE_SSE2
-#endif
-
-#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
-    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
-#define WEBP_USE_SSE41
-#endif
-
-#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
-#define WEBP_HAVE_SSE41
-#endif
-
-#undef WEBP_MSC_SSE41
-#undef WEBP_MSC_SSE2
-
-// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-// inline assembly would need to be modified for use with Native Client.
-#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
-     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
-    !defined(__native_client__)
-#define WEBP_USE_NEON
-#endif
-
-#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
-    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
-#define WEBP_ANDROID_NEON  // Android targets that may have NEON
-#define WEBP_USE_NEON
-#endif
-
-// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
-// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
-// arm_neon.h.
-#if defined(_MSC_VER) && \
-  ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
-   (_MSC_VER >= 1920 && defined(_M_ARM64)))
-#define WEBP_USE_NEON
-#define WEBP_USE_INTRINSICS
-#endif
-
-#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
-#define WEBP_HAVE_NEON
-#endif
-
-#if defined(__mips__) && !defined(__mips64) && \
-    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
-#define WEBP_USE_MIPS32
-#if (__mips_isa_rev >= 2)
-#define WEBP_USE_MIPS32_R2
-#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
-#define WEBP_USE_MIPS_DSP_R2
-#endif
-#endif
-#endif
-
-#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
-#define WEBP_USE_MSA
-#endif
-
-#ifndef WEBP_DSP_OMIT_C_CODE
-#define WEBP_DSP_OMIT_C_CODE 1
-#endif
-
-#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
-#define WEBP_NEON_OMIT_C_CODE 1
-#else
-#define WEBP_NEON_OMIT_C_CODE 0
-#endif
-
-#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
-#define WEBP_NEON_WORK_AROUND_GCC 1
-#else
-#define WEBP_NEON_WORK_AROUND_GCC 0
-#endif
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#define WEBP_TSAN_IGNORE_FUNCTION
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#undef WEBP_TSAN_IGNORE_FUNCTION
-#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
-#endif
-#endif
-
-#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-#include <pthread.h>  // NOLINT
-
-#define WEBP_DSP_INIT(func) do {                                    \
-  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
-      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
-  static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
-  if (pthread_mutex_lock(&func ## _lock)) break;                    \
-  if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func();          \
-  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
-  (void)pthread_mutex_unlock(&func ## _lock);                       \
-} while (0)
-#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-#define WEBP_DSP_INIT(func) do {                                    \
-  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
-      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
-  if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break;           \
-  func();                                                           \
-  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
-} while (0)
-#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
-
-// Defines an Init + helper function that control multiple initialization of
-// function pointers / tables.
-/* Usage:
-   WEBP_DSP_INIT_FUNC(InitFunc) {
-     ...function body
-   }
-*/
-#define WEBP_DSP_INIT_FUNC(name)                             \
-  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
-  WEBP_TSAN_IGNORE_FUNCTION void name(void) {                \
-    WEBP_DSP_INIT(name ## _body);                            \
-  }                                                          \
-  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
-
-#define WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#if defined(__clang__) && defined(__has_attribute)
-#if __has_attribute(no_sanitize)
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures. This is only meant to silence unaligned loads on platforms that
-// are known to support them.
-#undef WEBP_UBSAN_IGNORE_UNDEF
-#define WEBP_UBSAN_IGNORE_UNDEF \
-  __attribute__((no_sanitize("undefined")))
-
-// This macro prevents the undefined behavior sanitizer from reporting
-// failures related to unsigned integer overflows. This is only meant to
-// silence cases where this well defined behavior is expected.
-#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
-  __attribute__((no_sanitize("unsigned-integer-overflow")))
-#endif
-#endif
-
-// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
-// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
-#if !defined(WEBP_OFFSET_PTR)
-#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
-#endif
-
-// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
-#if !defined(WEBP_SWAP_16BIT_CSP)
-#define WEBP_SWAP_16BIT_CSP 0
-#endif
-
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) && \
-    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
-     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
-typedef enum {
-  kSSE2,
-  kSSE3,
-  kSlowSSSE3,  // special feature for slow SSSE3 architectures
-  kSSE4_1,
-  kAVX,
-  kAVX2,
-  kNEON,
-  kMIPS32,
-  kMIPSdspR2,
-  kMSA
-} CPUFeature;
-// returns true if the CPU supports the feature.
-typedef int (*VP8CPUInfo)(CPUFeature feature);
-WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;

 //------------------------------------------------------------------------------
 // Init stub generator
@ -550,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
 extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
                                    uint8_t* u, uint8_t* v, int width);

-// utilities for accurate RGB->YUV conversion
-extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
-                                       uint16_t* dst, int len);
-extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
-                                     int16_t* dst, int len);
-extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
-                                     int len,
-                                     const uint16_t* best_y, uint16_t* out);
-
 // Must be called before using the above.
 void WebPInitConvertARGBToYUV(void);

--- a/thirdparty/libwebp/src/dsp/lossless.h
+++ b/thirdparty/libwebp/src/dsp/lossless.h
@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
 // -----------------------------------------------------------------------------
 // Huffman-cost related functions.

-typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
-typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
-                                       int length);
+typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                      int length);
 typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
                                                const int Y[256]);

@ -198,7 +198,7 @@ typedef struct {        // small struct to hold counters
 } VP8LStreaks;

 typedef struct {            // small struct to hold bit entropy results
-  double entropy;           // entropy
+  float entropy;            // entropy
  uint32_t sum;             // sum of the population
  int nonzeros;             // number of non-zero elements in the population
  uint32_t max_val;         // maximum value in the population
--- a/thirdparty/libwebp/src/dsp/lossless_enc.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc.c
@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
 static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
  int i;
-  double retval = 0.;
+  float retval = 0.f;
  int sumX = 0, sumXY = 0;
  for (i = 0; i < 256; ++i) {
    const int x = X[i];
@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
    }
  }
  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
-  return (float)retval;
+  return retval;
 }

 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,

 //------------------------------------------------------------------------------

-static double ExtraCost_C(const uint32_t* population, int length) {
+static float ExtraCost_C(const uint32_t* population, int length) {
  int i;
-  double cost = 0.;
+  float cost = 0.f;
  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
  return cost;
 }

-static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
+static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
                                  int length) {
  int i;
-  double cost = 0.;
+  float cost = 0.f;
  for (i = 2; i < length - 2; ++i) {
    const int xy = X[i + 2] + Y[i + 2];
    cost += (i >> 1) * xy;
--- a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
 //     cost += i * *(pop + 1);
 //     pop += 2;
 //   }
-//   return (double)cost;
-static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
+//   return (float)cost;
+static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
  int i, temp0, temp1;
  const uint32_t* pop = &population[4];
  const uint32_t* const LoopEnd = &population[length];
@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
    : "memory", "hi", "lo"
  );

-  return (double)((int64_t)temp0 << 32 | temp1);
+  return (float)((int64_t)temp0 << 32 | temp1);
 }

 // C version of this function:
@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
 //     pX += 2;
 //     pY += 2;
 //   }
-//   return (double)cost;
-static double ExtraCostCombined_MIPS32(const uint32_t* const X,
-                                       const uint32_t* const Y, int length) {
+//   return (float)cost;
+static float ExtraCostCombined_MIPS32(const uint32_t* const X,
+                                      const uint32_t* const Y, int length) {
  int i, temp0, temp1, temp2, temp3;
  const uint32_t* pX = &X[4];
  const uint32_t* pY = &Y[4];
@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
    : "memory", "hi", "lo"
  );

-  return (double)((int64_t)temp0 << 32 | temp1);
+  return (float)((int64_t)temp0 << 32 | temp1);
 }

 #define HUFFMAN_COST_PASS                                 \
@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
 static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
                             uint32_t* pout, int size) {
  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const uint32_t end = ((size) / 4) * 4;
+  const int end = ((size) / 4) * 4;
  const uint32_t* const LoopEnd = pa + end;
  int i;
  ASM_START
  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
  ASM_END_0
-  for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
+  for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
 }

 static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const uint32_t end = ((size) / 4) * 4;
+  const int end = ((size) / 4) * 4;
  const uint32_t* const LoopEnd = pa + end;
  int i;
  ASM_START
  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
  ASM_END_1
-  for (i = end; i < size; ++i) pout[i] += pa[i];
+  for (i = 0; i < size - end; ++i) pout[i] += pa[i];
 }

 #undef ASM_END_1
--- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {

 static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
  int i;
-  double retval = 0.;
+  float retval = 0.f;
  int sumX = 0, sumXY = 0;
  const __m128i zero = _mm_setzero_si128();

@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
    }
  }
  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
-  return (float)retval;
+  return retval;
 }

 #else
--- a/thirdparty/libwebp/src/dsp/yuv.c
+++ b/thirdparty/libwebp/src/dsp/yuv.c
@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,

 //-----------------------------------------------------------------------------

-#if !WEBP_NEON_OMIT_C_CODE
-#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
-  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
-                                  uint16_t* dst, int len) {
-  uint64_t diff = 0;
-  int i;
-  for (i = 0; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)dst[i] + diff_y;
-    dst[i] = clip_y(new_y);
-    diff += (uint64_t)abs(diff_y);
-  }
-  return diff;
-}
-
-static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
-                                int16_t* dst, int len) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
-                                const uint16_t* best_y, uint16_t* out) {
-  int i;
-  for (i = 0; i < len; ++i, ++A, ++B) {
-    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
-    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
-    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
-    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
-  }
-}
-#endif  // !WEBP_NEON_OMIT_C_CODE
-
-#undef MAX_Y
-
-//-----------------------------------------------------------------------------
-
 void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
 void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
 void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
 void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
                            int src_width, int do_store);

-uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
-                                uint16_t* dst, int len);
-void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
-                              int16_t* dst, int len);
-void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
-                              const uint16_t* best_y, uint16_t* out);
-
 extern void WebPInitConvertARGBToYUVSSE2(void);
 extern void WebPInitConvertARGBToYUVSSE41(void);
 extern void WebPInitConvertARGBToYUVNEON(void);
-extern void WebPInitSharpYUVSSE2(void);
-extern void WebPInitSharpYUVNEON(void);

 WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
  WebPConvertARGBToY = ConvertARGBToY_C;
@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {

  WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;

-#if !WEBP_NEON_OMIT_C_CODE
-  WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
-  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
-  WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
-#endif
-
  if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_HAVE_SSE2)
    if (VP8GetCPUInfo(kSSE2)) {
      WebPInitConvertARGBToYUVSSE2();
-      WebPInitSharpYUVSSE2();
    }
 #endif  // WEBP_HAVE_SSE2
 #if defined(WEBP_HAVE_SSE41)
@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
  if (WEBP_NEON_OMIT_C_CODE ||
      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
    WebPInitConvertARGBToYUVNEON();
-    WebPInitSharpYUVNEON();
  }
 #endif  // WEBP_HAVE_NEON

@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
  assert(WebPConvertRGB24ToY != NULL);
  assert(WebPConvertBGR24ToY != NULL);
  assert(WebPConvertRGBA32ToUV != NULL);
-  assert(WebPSharpYUVUpdateY != NULL);
-  assert(WebPSharpYUVUpdateRGB != NULL);
-  assert(WebPSharpYUVFilterRow != NULL);
 }
--- a/thirdparty/libwebp/src/dsp/yuv_neon.c
+++ b/thirdparty/libwebp/src/dsp/yuv_neon.c
@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
 }

-//------------------------------------------------------------------------------
-
-#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
-static uint16_t clip_y_NEON(int v) {
-  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
-                                     uint16_t* dst, int len) {
-  int i;
-  const int16x8_t zero = vdupq_n_s16(0);
-  const int16x8_t max = vdupq_n_s16(MAX_Y);
-  uint64x2_t sum = vdupq_n_u64(0);
-  uint64_t diff;
-
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
-    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
-    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
-    const int16x8_t D = vsubq_s16(A, B);       // diff_y
-    const int16x8_t F = vaddq_s16(C, D);       // new_y
-    const uint16x8_t H =
-        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
-    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
-    vst1q_u16(dst + i, H);
-    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
-  }
-  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
-  for (; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)(dst[i]) + diff_y;
-    dst[i] = clip_y_NEON(new_y);
-    diff += (uint64_t)(abs(diff_y));
-  }
-  return diff;
-}
-
-static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
-                                   int16_t* dst, int len) {
-  int i;
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t A = vld1q_s16(ref + i);
-    const int16x8_t B = vld1q_s16(src + i);
-    const int16x8_t C = vld1q_s16(dst + i);
-    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
-    const int16x8_t E = vaddq_s16(C, D);   // new_uv
-    vst1q_s16(dst + i, E);
-  }
-  for (; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
-                                   const uint16_t* best_y, uint16_t* out) {
-  int i;
-  const int16x8_t max = vdupq_n_s16(MAX_Y);
-  const int16x8_t zero = vdupq_n_s16(0);
-  for (i = 0; i + 8 <= len; i += 8) {
-    const int16x8_t a0 = vld1q_s16(A + i + 0);
-    const int16x8_t a1 = vld1q_s16(A + i + 1);
-    const int16x8_t b0 = vld1q_s16(B + i + 0);
-    const int16x8_t b1 = vld1q_s16(B + i + 1);
-    const int16x8_t a0b1 = vaddq_s16(a0, b1);
-    const int16x8_t a1b0 = vaddq_s16(a1, b0);
-    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
-    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
-    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
-    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
-    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
-    const int16x8_t d0 = vaddq_s16(c1, a0);
-    const int16x8_t d1 = vaddq_s16(c0, a1);
-    const int16x8_t e0 = vrshrq_n_s16(d0, 1);
-    const int16x8_t e1 = vrshrq_n_s16(d1, 1);
-    const int16x8x2_t f = vzipq_s16(e0, e1);
-    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
-    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
-    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
-    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
-    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
-    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
-    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
-    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
-  }
-  for (; i < len; ++i) {
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
-    out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
-  }
-}
-#undef MAX_Y
-
-//------------------------------------------------------------------------------
-
-extern void WebPInitSharpYUVNEON(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
-  WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
-  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
-  WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
-}
-
 #else  // !WEBP_USE_NEON

 WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
-WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)

 #endif  // WEBP_USE_NEON
--- a/thirdparty/libwebp/src/dsp/yuv_sse2.c
+++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c
@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
 }

-//------------------------------------------------------------------------------
-
-#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
-static uint16_t clip_y(int v) {
-  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
-}
-
-static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
-                                     uint16_t* dst, int len) {
-  uint64_t diff = 0;
-  uint32_t tmp[4];
-  int i;
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i max = _mm_set1_epi16(MAX_Y);
-  const __m128i one = _mm_set1_epi16(1);
-  __m128i sum = zero;
-
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
-    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
-    const __m128i F = _mm_add_epi16(C, D);       // new_y
-    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
-    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
-    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
-    _mm_storeu_si128((__m128i*)(dst + i), H);
-    sum = _mm_add_epi32(sum, I);
-  }
-  _mm_storeu_si128((__m128i*)tmp, sum);
-  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
-  for (; i < len; ++i) {
-    const int diff_y = ref[i] - src[i];
-    const int new_y = (int)dst[i] + diff_y;
-    dst[i] = clip_y(new_y);
-    diff += (uint64_t)abs(diff_y);
-  }
-  return diff;
-}
-
-static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
-                                   int16_t* dst, int len) {
-  int i = 0;
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
-    const __m128i E = _mm_add_epi16(C, D);   // new_uv
-    _mm_storeu_si128((__m128i*)(dst + i), E);
-  }
-  for (; i < len; ++i) {
-    const int diff_uv = ref[i] - src[i];
-    dst[i] += diff_uv;
-  }
-}
-
-static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
-                                   const uint16_t* best_y, uint16_t* out) {
-  int i;
-  const __m128i kCst8 = _mm_set1_epi16(8);
-  const __m128i max = _mm_set1_epi16(MAX_Y);
-  const __m128i zero = _mm_setzero_si128();
-  for (i = 0; i + 8 <= len; i += 8) {
-    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
-    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
-    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
-    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
-    const __m128i a0b1 = _mm_add_epi16(a0, b1);
-    const __m128i a1b0 = _mm_add_epi16(a1, b0);
-    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
-    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
-    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
-    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
-    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
-    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
-    const __m128i d0 = _mm_add_epi16(c1, a0);
-    const __m128i d1 = _mm_add_epi16(c0, a1);
-    const __m128i e0 = _mm_srai_epi16(d0, 1);
-    const __m128i e1 = _mm_srai_epi16(d1, 1);
-    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
-    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
-    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
-    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
-    const __m128i h0 = _mm_add_epi16(g0, f0);
-    const __m128i h1 = _mm_add_epi16(g1, f1);
-    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
-    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
-    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
-  }
-  for (; i < len; ++i) {
-    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
-    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
-    // We reuse the common sub-expressions.
-    const int a0b1 = A[i + 0] + B[i + 1];
-    const int a1b0 = A[i + 1] + B[i + 0];
-    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
-    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
-  }
-}
-
-#undef MAX_Y
-
-//------------------------------------------------------------------------------
-
-extern void WebPInitSharpYUVSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
-  WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
-  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
-  WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
-}
-
 #else  // !WEBP_USE_SSE2

 WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
 WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
-WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)

 #endif  // WEBP_USE_SSE2
--- a/thirdparty/libwebp/src/enc/alpha_enc.c
+++ b/thirdparty/libwebp/src/enc/alpha_enc.c
@ -86,7 +86,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
  // a decoder bug related to alpha with color cache.
  // See: https://code.google.com/p/webp/issues/detail?id=239
  // Need to re-enable this later.
-  ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK);
+  ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0);
  WebPPictureFree(&picture);
  ok = ok && !bw->error_;
  if (!ok) {
--- a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c
+++ b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c
@ -15,10 +15,11 @@
 //

 #include <assert.h>
+#include <float.h>

+#include "src/dsp/lossless_common.h"
 #include "src/enc/backward_references_enc.h"
 #include "src/enc/histogram_enc.h"
-#include "src/dsp/lossless_common.h"
 #include "src/utils/color_cache_utils.h"
 #include "src/utils/utils.h"

@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
                                      const PixOrCopy v);

 typedef struct {
-  double alpha_[VALUES_IN_BYTE];
-  double red_[VALUES_IN_BYTE];
-  double blue_[VALUES_IN_BYTE];
-  double distance_[NUM_DISTANCE_CODES];
-  double* literal_;
+  float alpha_[VALUES_IN_BYTE];
+  float red_[VALUES_IN_BYTE];
+  float blue_[VALUES_IN_BYTE];
+  float distance_[NUM_DISTANCE_CODES];
+  float* literal_;
 } CostModel;

 static void ConvertPopulationCountTableToBitEstimates(
-    int num_symbols, const uint32_t population_counts[], double output[]) {
+    int num_symbols, const uint32_t population_counts[], float output[]) {
  uint32_t sum = 0;
  int nonzeros = 0;
  int i;
@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates(
  if (nonzeros <= 1) {
    memset(output, 0, num_symbols * sizeof(*output));
  } else {
-    const double logsum = VP8LFastLog2(sum);
+    const float logsum = VP8LFastLog2(sum);
    for (i = 0; i < num_symbols; ++i) {
      output[i] = logsum - VP8LFastLog2(population_counts[i]);
    }
@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
  }

  ConvertPopulationCountTableToBitEstimates(
-      VP8LHistogramNumCodes(histo->palette_code_bits_),
-      histo->literal_, m->literal_);
+      VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
+      m->literal_);
  ConvertPopulationCountTableToBitEstimates(
      VALUES_IN_BYTE, histo->red_, m->red_);
  ConvertPopulationCountTableToBitEstimates(
@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
  return ok;
 }

-static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) {
  return m->alpha_[v >> 24] +
         m->red_[(v >> 16) & 0xff] +
         m->literal_[(v >> 8) & 0xff] +
         m->blue_[v & 0xff];
 }

-static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
+static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) {
  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
  return m->literal_[literal_idx];
 }

-static WEBP_INLINE double GetLengthCost(const CostModel* const m,
-                                        uint32_t length) {
+static WEBP_INLINE float GetLengthCost(const CostModel* const m,
+                                       uint32_t length) {
  int code, extra_bits;
  VP8LPrefixEncodeBits(length, &code, &extra_bits);
  return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
 }

-static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
-                                          uint32_t distance) {
+static WEBP_INLINE float GetDistanceCost(const CostModel* const m,
+                                         uint32_t distance) {
  int code, extra_bits;
  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
  return m->distance_[code] + extra_bits;
@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
    const uint32_t* const argb, VP8LColorCache* const hashers,
    const CostModel* const cost_model, int idx, int use_color_cache,
    float prev_cost, float* const cost, uint16_t* const dist_array) {
-  double cost_val = prev_cost;
+  float cost_val = prev_cost;
  const uint32_t color = argb[idx];
  const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
  if (ix >= 0) {
    // use_color_cache is true and hashers contains color
-    const double mul0 = 0.68;
+    const float mul0 = 0.68f;
    cost_val += GetCacheCost(cost_model, ix) * mul0;
  } else {
-    const double mul1 = 0.82;
+    const float mul1 = 0.82f;
    if (use_color_cache) VP8LColorCacheInsert(hashers, color);
    cost_val += GetLiteralCost(cost_model, color) * mul1;
  }
  if (cost[idx] > cost_val) {
-    cost[idx] = (float)cost_val;
+    cost[idx] = cost_val;
    dist_array[idx] = 1;  // only one is inserted.
  }
 }
@ -172,7 +173,7 @@ struct CostInterval {

 // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
 typedef struct {
-  double cost_;
+  float cost_;
  int start_;
  int end_;       // Exclusive.
 } CostCacheInterval;
@ -187,7 +188,7 @@ typedef struct {
  int count_;  // The number of stored intervals.
  CostCacheInterval* cache_intervals_;
  size_t cache_intervals_size_;
-  double cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
+  float cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
  float* costs_;
  uint16_t* dist_array_;
  // Most of the time, we only need few intervals -> use a free-list, to avoid
@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager,
  CostManagerInitFreeList(manager);

  // Fill in the cost_cache_.
-  manager->cache_intervals_size_ = 1;
-  manager->cost_cache_[0] = GetLengthCost(cost_model, 0);
-  for (i = 1; i < cost_cache_size; ++i) {
+  // Has to be done in two passes due to a GCC bug on i686
+  // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+  for (i = 0; i < cost_cache_size; ++i) {
    manager->cost_cache_[i] = GetLengthCost(cost_model, i);
+  }
+  manager->cache_intervals_size_ = 1;
+  for (i = 1; i < cost_cache_size; ++i) {
    // Get the number of bound intervals.
    if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
      ++manager->cache_intervals_size_;
@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager,
    cur->end_ = 1;
    cur->cost_ = manager->cost_cache_[0];
    for (i = 1; i < cost_cache_size; ++i) {
-      const double cost_val = manager->cost_cache_[i];
+      const float cost_val = manager->cost_cache_[i];
      if (cost_val != cur->cost_) {
        ++cur;
        // Initialize an interval.
@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager,
      }
      cur->end_ = i + 1;
    }
+    assert((size_t)(cur - manager->cache_intervals_) + 1 ==
+           manager->cache_intervals_size_);
  }

  manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager,
    return 0;
  }
  // Set the initial costs_ high for every pixel as we will keep the minimum.
-  for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f;
+  for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX;

  return 1;
 }
@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
 // If handling the interval or one of its subintervals becomes to heavy, its
 // contribution is added to the costs right away.
 static WEBP_INLINE void PushInterval(CostManager* const manager,
-                                     double distance_cost, int position,
+                                     float distance_cost, int position,
                                     int len) {
  size_t i;
  CostInterval* interval = manager->head_;
@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
      const int k = j - position;
      float cost_tmp;
      assert(k >= 0 && k < MAX_LENGTH);
-      cost_tmp = (float)(distance_cost + manager->cost_cache_[k]);
+      cost_tmp = distance_cost + manager->cost_cache_[k];

      if (manager->costs_[j] > cost_tmp) {
        manager->costs_[j] = cost_tmp;
@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
    const int end = position + (cost_cache_intervals[i].end_ > len
                                 ? len
                                 : cost_cache_intervals[i].end_);
-    const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_);
+    const float cost = distance_cost + cost_cache_intervals[i].cost_;

    for (; interval != NULL && interval->start_ < end;
         interval = interval_next) {
@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly(
  const int pix_count = xsize * ysize;
  const int use_color_cache = (cache_bits > 0);
  const size_t literal_array_size =
-      sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
-                        ((cache_bits > 0) ? (1 << cache_bits) : 0));
+      sizeof(float) * (VP8LHistogramNumCodes(cache_bits));
  const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
  CostModel* const cost_model =
      (CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
  VP8LColorCache hashers;
  CostManager* cost_manager =
-      (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager));
+      (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager));
  int offset_prev = -1, len_prev = -1;
-  double offset_cost = -1;
+  float offset_cost = -1.f;
  int first_offset_is_constant = -1;  // initialized with 'impossible' value
  int reach = 0;

  if (cost_model == NULL || cost_manager == NULL) goto Error;

-  cost_model->literal_ = (double*)(cost_model + 1);
+  cost_model->literal_ = (float*)(cost_model + 1);
  if (use_color_cache) {
    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    if (!cc_init) goto Error;
@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly(
  }

  ok = !refs->error_;
-Error:
+ Error:
  if (cc_init) VP8LColorCacheClear(&hashers);
  CostManagerClear(cost_manager);
  WebPSafeFree(cost_model);
--- a/thirdparty/libwebp/src/enc/backward_references_enc.c
+++ b/thirdparty/libwebp/src/enc/backward_references_enc.c
@ -10,6 +10,8 @@
 // Author: Jyrki Alakuijala (jyrki@google.com)
 //

+#include "src/enc/backward_references_enc.h"
+
 #include <assert.h>
 #include <float.h>
 #include <math.h>
@ -17,10 +19,11 @@
 #include "src/dsp/dsp.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
-#include "src/enc/backward_references_enc.h"
 #include "src/enc/histogram_enc.h"
+#include "src/enc/vp8i_enc.h"
 #include "src/utils/color_cache_utils.h"
 #include "src/utils/utils.h"
+#include "src/webp/encode.h"

 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references

@ -255,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) {

 int VP8LHashChainFill(VP8LHashChain* const p, int quality,
                      const uint32_t* const argb, int xsize, int ysize,
-                      int low_effort) {
+                      int low_effort, const WebPPicture* const pic,
+                      int percent_range, int* const percent) {
  const int size = xsize * ysize;
  const int iter_max = GetMaxItersForQuality(quality);
  const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
+  int remaining_percent = percent_range;
+  int percent_start = *percent;
  int pos;
  int argb_comp;
  uint32_t base_position;
@ -276,7 +282,13 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,

  hash_to_first_index =
      (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
-  if (hash_to_first_index == NULL) return 0;
+  if (hash_to_first_index == NULL) {
+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return 0;
+  }
+
+  percent_range = remaining_percent / 2;
+  remaining_percent -= percent_range;

  // Set the int32_t array to -1.
  memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
@ -323,12 +335,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
      hash_to_first_index[hash_code] = pos++;
      argb_comp = argb_comp_next;
    }
+
+    if (!WebPReportProgress(
+            pic, percent_start + percent_range * pos / (size - 2), percent)) {
+      WebPSafeFree(hash_to_first_index);
+      return 0;
+    }
  }
  // Process the penultimate pixel.
  chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];

  WebPSafeFree(hash_to_first_index);

+  percent_start += percent_range;
+  if (!WebPReportProgress(pic, percent_start, percent)) return 0;
+  percent_range = remaining_percent;
+
  // Find the best match interval at each pixel, defined by an offset to the
  // pixel and a length. The right-most pixel cannot match anything to the right
  // (hence a best length of 0) and the left-most pixel nothing to the left
@ -417,8 +439,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
        max_base_position = base_position;
      }
    }
+
+    if (!WebPReportProgress(pic,
+                            percent_start + percent_range *
+                                                (size - 2 - base_position) /
+                                                (size - 2),
+                            percent)) {
+      return 0;
+    }
  }
-  return 1;
+
+  return WebPReportProgress(pic, percent_start + percent_range, percent);
 }

 static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
@ -728,7 +759,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
                                  int* const best_cache_bits) {
  int i;
  const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
-  double entropy_min = MAX_ENTROPY;
+  float entropy_min = MAX_ENTROPY;
  int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
  VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
@ -813,14 +844,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
  }

  for (i = 0; i <= cache_bits_max; ++i) {
-    const double entropy = VP8LHistogramEstimateBits(histos[i]);
+    const float entropy = VP8LHistogramEstimateBits(histos[i]);
    if (i == 0 || entropy < entropy_min) {
      entropy_min = entropy;
      *best_cache_bits = i;
    }
  }
  ok = 1;
-Error:
+ Error:
  for (i = 0; i <= cache_bits_max; ++i) {
    if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
    VP8LFreeHistogram(histos[i]);
@ -890,7 +921,7 @@ static int GetBackwardReferences(int width, int height,
  int i, lz77_type;
  // Index 0 is for a color cache, index 1 for no cache (if needed).
  int lz77_types_best[2] = {0, 0};
-  double bit_costs_best[2] = {DBL_MAX, DBL_MAX};
+  float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
  VP8LHashChain hash_chain_box;
  VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
  int status = 0;
@ -902,7 +933,7 @@ static int GetBackwardReferences(int width, int height,
  for (lz77_type = 1; lz77_types_to_try;
       lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
    int res = 0;
-    double bit_cost = 0.;
+    float bit_cost = 0.f;
    if ((lz77_types_to_try & lz77_type) == 0) continue;
    switch (lz77_type) {
      case kLZ77RLE:
@ -976,15 +1007,16 @@ static int GetBackwardReferences(int width, int height,
      const VP8LHashChain* const hash_chain_tmp =
          (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
      const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
-      if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
-                                               hash_chain_tmp, &refs[i],
-                                               refs_tmp)) {
-        double bit_cost_trace;
-        VP8LHistogramCreate(histo, refs_tmp, cache_bits);
-        bit_cost_trace = VP8LHistogramEstimateBits(histo);
-        if (bit_cost_trace < bit_costs_best[i]) {
-          BackwardRefsSwap(refs_tmp, &refs[i]);
-        }
+      float bit_cost_trace;
+      if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
+                                                hash_chain_tmp, &refs[i],
+                                                refs_tmp)) {
+        goto Error;
+      }
+      VP8LHistogramCreate(histo, refs_tmp, cache_bits);
+      bit_cost_trace = VP8LHistogramEstimateBits(histo);
+      if (bit_cost_trace < bit_costs_best[i]) {
+        BackwardRefsSwap(refs_tmp, &refs[i]);
      }
    }

@ -1000,31 +1032,37 @@ static int GetBackwardReferences(int width, int height,
  }
  status = 1;

-Error:
+ Error:
  VP8LHashChainClear(&hash_chain_box);
  VP8LFreeHistogram(histo);
  return status;
 }

-WebPEncodingError VP8LGetBackwardReferences(
+int VP8LGetBackwardReferences(
    int width, int height, const uint32_t* const argb, int quality,
    int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
-    int* const cache_bits_best) {
+    int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
+    int* const percent) {
  if (low_effort) {
    VP8LBackwardRefs* refs_best;
    *cache_bits_best = cache_bits_max;
    refs_best = GetBackwardReferencesLowEffort(
        width, height, argb, cache_bits_best, hash_chain, refs);
-    if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+    if (refs_best == NULL) {
+      WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
+      return 0;
+    }
    // Set it in first position.
    BackwardRefsSwap(refs_best, &refs[0]);
  } else {
    if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
                               cache_bits_max, do_no_cache, hash_chain, refs,
                               cache_bits_best)) {
-      return VP8_ENC_ERROR_OUT_OF_MEMORY;
+      WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
+      return 0;
    }
  }
-  return VP8_ENC_OK;
+
+  return WebPReportProgress(pic, *percent + percent_range, percent);
 }
--- a/thirdparty/libwebp/src/enc/backward_references_enc.h
+++ b/thirdparty/libwebp/src/enc/backward_references_enc.h
@ -134,10 +134,11 @@ struct VP8LHashChain {

 // Must be called first, to set size.
 int VP8LHashChainInit(VP8LHashChain* const p, int size);
-// Pre-compute the best matches for argb.
+// Pre-compute the best matches for argb. pic and percent are for progress.
 int VP8LHashChainFill(VP8LHashChain* const p, int quality,
                      const uint32_t* const argb, int xsize, int ysize,
-                      int low_effort);
+                      int low_effort, const WebPPicture* const pic,
+                      int percent_range, int* const percent);
 void VP8LHashChainClear(VP8LHashChain* const p);  // release memory

 static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
@ -227,11 +228,14 @@ enum VP8LLZ77Type {
 // VP8LBackwardRefs is put in the first element, the best value with no-cache in
 // the second element.
 // In both cases, the last element is used as temporary internally.
-WebPEncodingError VP8LGetBackwardReferences(
+// pic and percent are for progress.
+// Returns false in case of error (stored in pic->error_code).
+int VP8LGetBackwardReferences(
    int width, int height, const uint32_t* const argb, int quality,
    int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
    const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
-    int* const cache_bits_best);
+    int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
+    int* const percent);

 #ifdef __cplusplus
 }
--- a/thirdparty/libwebp/src/enc/histogram_enc.c
+++ b/thirdparty/libwebp/src/enc/histogram_enc.c
@ -13,15 +13,17 @@
 #include "src/webp/config.h"
 #endif

+#include <float.h>
 #include <math.h>

-#include "src/enc/backward_references_enc.h"
-#include "src/enc/histogram_enc.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
+#include "src/enc/backward_references_enc.h"
+#include "src/enc/histogram_enc.h"
+#include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"

-#define MAX_COST 1.e38
+#define MAX_BIT_COST FLT_MAX

 // Number of partitions for the three dominant (literal, red and blue) symbol
 // costs.
@ -228,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
 // -----------------------------------------------------------------------------
 // Entropy-related functions.

-static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
-  double mix;
+static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
+  float mix;
  if (entropy->nonzeros < 5) {
    if (entropy->nonzeros <= 1) {
      return 0;
@ -238,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
    // Let's mix in a bit of entropy to favor good clustering when
    // distributions of these are combined.
    if (entropy->nonzeros == 2) {
-      return 0.99 * entropy->sum + 0.01 * entropy->entropy;
+      return 0.99f * entropy->sum + 0.01f * entropy->entropy;
    }
    // No matter what the entropy says, we cannot be better than min_limit
    // with Huffman coding. I am mixing a bit of entropy into the
    // min_limit since it produces much better (~0.5 %) compression results
    // perhaps because of better entropy clustering.
    if (entropy->nonzeros == 3) {
-      mix = 0.95;
+      mix = 0.95f;
    } else {
-      mix = 0.7;  // nonzeros == 4.
+      mix = 0.7f;  // nonzeros == 4.
    }
  } else {
-    mix = 0.627;
+    mix = 0.627f;
  }

  {
-    double min_limit = 2 * entropy->sum - entropy->max_val;
-    min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
+    float min_limit = 2.f * entropy->sum - entropy->max_val;
+    min_limit = mix * min_limit + (1.f - mix) * entropy->entropy;
    return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
  }
 }

-double VP8LBitsEntropy(const uint32_t* const array, int n) {
+float VP8LBitsEntropy(const uint32_t* const array, int n) {
  VP8LBitEntropy entropy;
  VP8LBitsEntropyUnrefined(array, n, &entropy);

  return BitsEntropyRefine(&entropy);
 }

-static double InitialHuffmanCost(void) {
+static float InitialHuffmanCost(void) {
  // Small bias because Huffman code length is typically not stored in
  // full length.
  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
-  static const double kSmallBias = 9.1;
+  static const float kSmallBias = 9.1f;
  return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
 }

 // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
-static double FinalHuffmanCost(const VP8LStreaks* const stats) {
+static float FinalHuffmanCost(const VP8LStreaks* const stats) {
  // The constants in this function are experimental and got rounded from
  // their original values in 1/8 when switched to 1/1024.
-  double retval = InitialHuffmanCost();
+  float retval = InitialHuffmanCost();
  // Second coefficient: Many zeros in the histogram are covered efficiently
  // by a run-length encode. Originally 2/8.
-  retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
+  retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1];
  // Second coefficient: Constant values are encoded less efficiently, but still
  // RLE'ed. Originally 6/8.
-  retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
+  retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1];
  // 0s are usually encoded more efficiently than non-0s.
  // Originally 15/8.
-  retval += 1.796875 * stats->streaks[0][0];
+  retval += 1.796875f * stats->streaks[0][0];
  // Originally 26/8.
-  retval += 3.28125 * stats->streaks[1][0];
+  retval += 3.28125f * stats->streaks[1][0];
  return retval;
 }

 // Get the symbol entropy for the distribution 'population'.
 // Set 'trivial_sym', if there's only one symbol present in the distribution.
-static double PopulationCost(const uint32_t* const population, int length,
-                             uint32_t* const trivial_sym,
-                             uint8_t* const is_used) {
+static float PopulationCost(const uint32_t* const population, int length,
+                            uint32_t* const trivial_sym,
+                            uint8_t* const is_used) {
  VP8LBitEntropy bit_entropy;
  VP8LStreaks stats;
  VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@ -314,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length,

 // trivial_at_end is 1 if the two histograms only have one element that is
 // non-zero: both the zero-th one, or both the last one.
-static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
-                                             const uint32_t* const Y,
-                                             int length, int is_X_used,
-                                             int is_Y_used,
-                                             int trivial_at_end) {
+static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
+                                            const uint32_t* const Y, int length,
+                                            int is_X_used, int is_Y_used,
+                                            int trivial_at_end) {
  VP8LStreaks stats;
  if (trivial_at_end) {
    // This configuration is due to palettization that transforms an indexed
@ -356,7 +357,7 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
 }

 // Estimates the Entropy + Huffman + other block overhead size cost.
-double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
+float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
  return
      PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
                     NULL, &p->is_used_[0])
@ -373,8 +374,7 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) {

 static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
                                       const VP8LHistogram* const b,
-                                       double cost_threshold,
-                                       double* cost) {
+                                       float cost_threshold, float* cost) {
  const int palette_code_bits = a->palette_code_bits_;
  int trivial_at_end = 0;
  assert(a->palette_code_bits_ == b->palette_code_bits_);
@ -439,12 +439,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
 // Since the previous score passed is 'cost_threshold', we only need to compare
 // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
 // early.
-static double HistogramAddEval(const VP8LHistogram* const a,
-                               const VP8LHistogram* const b,
-                               VP8LHistogram* const out,
-                               double cost_threshold) {
-  double cost = 0;
-  const double sum_cost = a->bit_cost_ + b->bit_cost_;
+static float HistogramAddEval(const VP8LHistogram* const a,
+                              const VP8LHistogram* const b,
+                              VP8LHistogram* const out, float cost_threshold) {
+  float cost = 0;
+  const float sum_cost = a->bit_cost_ + b->bit_cost_;
  cost_threshold += sum_cost;

  if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
@ -459,10 +458,10 @@ static double HistogramAddEval(const VP8LHistogram* const a,
 // Same as HistogramAddEval(), except that the resulting histogram
 // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
 // the term C(b) which is constant over all the evaluations.
-static double HistogramAddThresh(const VP8LHistogram* const a,
-                                 const VP8LHistogram* const b,
-                                 double cost_threshold) {
-  double cost;
+static float HistogramAddThresh(const VP8LHistogram* const a,
+                                const VP8LHistogram* const b,
+                                float cost_threshold) {
+  float cost;
  assert(a != NULL && b != NULL);
  cost = -a->bit_cost_;
  GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
@ -473,24 +472,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a,

 // The structure to keep track of cost range for the three dominant entropy
 // symbols.
-// TODO(skal): Evaluate if float can be used here instead of double for
-// representing the entropy costs.
 typedef struct {
-  double literal_max_;
-  double literal_min_;
-  double red_max_;
-  double red_min_;
-  double blue_max_;
-  double blue_min_;
+  float literal_max_;
+  float literal_min_;
+  float red_max_;
+  float red_min_;
+  float blue_max_;
+  float blue_min_;
 } DominantCostRange;

 static void DominantCostRangeInit(DominantCostRange* const c) {
  c->literal_max_ = 0.;
-  c->literal_min_ = MAX_COST;
+  c->literal_min_ = MAX_BIT_COST;
  c->red_max_ = 0.;
-  c->red_min_ = MAX_COST;
+  c->red_min_ = MAX_BIT_COST;
  c->blue_max_ = 0.;
-  c->blue_min_ = MAX_COST;
+  c->blue_min_ = MAX_BIT_COST;
 }

 static void UpdateDominantCostRange(
@ -505,10 +502,9 @@ static void UpdateDominantCostRange(

 static void UpdateHistogramCost(VP8LHistogram* const h) {
  uint32_t alpha_sym, red_sym, blue_sym;
-  const double alpha_cost =
-      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
-                     &h->is_used_[3]);
-  const double distance_cost =
+  const float alpha_cost =
+      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]);
+  const float distance_cost =
      PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
      VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
  const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
@ -529,10 +525,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
  }
 }

-static int GetBinIdForEntropy(double min, double max, double val) {
-  const double range = max - min;
+static int GetBinIdForEntropy(float min, float max, float val) {
+  const float range = max - min;
  if (range > 0.) {
-    const double delta = val - min;
+    const float delta = val - min;
    return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
  } else {
    return 0;
@ -641,15 +637,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,

 // Merges some histograms with same bin_id together if it's advantageous.
 // Sets the remaining histograms to NULL.
-static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
-                                       int* num_used,
-                                       const uint16_t* const clusters,
-                                       uint16_t* const cluster_mappings,
-                                       VP8LHistogram* cur_combo,
-                                       const uint16_t* const bin_map,
-                                       int num_bins,
-                                       double combine_cost_factor,
-                                       int low_effort) {
+static void HistogramCombineEntropyBin(
+    VP8LHistogramSet* const image_histo, int* num_used,
+    const uint16_t* const clusters, uint16_t* const cluster_mappings,
+    VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
+    float combine_cost_factor, int low_effort) {
  VP8LHistogram** const histograms = image_histo->histograms;
  int idx;
  struct {
@ -679,11 +671,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
      cluster_mappings[clusters[idx]] = clusters[first];
    } else {
      // try to merge #idx into #first (both share the same bin_id)
-      const double bit_cost = histograms[idx]->bit_cost_;
-      const double bit_cost_thresh = -bit_cost * combine_cost_factor;
-      const double curr_cost_diff =
-          HistogramAddEval(histograms[first], histograms[idx],
-                           cur_combo, bit_cost_thresh);
+      const float bit_cost = histograms[idx]->bit_cost_;
+      const float bit_cost_thresh = -bit_cost * combine_cost_factor;
+      const float curr_cost_diff = HistogramAddEval(
+          histograms[first], histograms[idx], cur_combo, bit_cost_thresh);
      if (curr_cost_diff < bit_cost_thresh) {
        // Try to merge two histograms only if the combo is a trivial one or
        // the two candidate histograms are already non-trivial.
@ -731,8 +722,8 @@ static uint32_t MyRand(uint32_t* const seed) {
 typedef struct {
  int idx1;
  int idx2;
-  double cost_diff;
-  double cost_combo;
+  float cost_diff;
+  float cost_combo;
 } HistogramPair;

 typedef struct {
@ -787,10 +778,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
 // Update the cost diff and combo of a pair of histograms. This needs to be
 // called when the the histograms have been merged with a third one.
 static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
-                                 const VP8LHistogram* const h2,
-                                 double threshold,
+                                 const VP8LHistogram* const h2, float threshold,
                                 HistogramPair* const pair) {
-  const double sum_cost = h1->bit_cost_ + h2->bit_cost_;
+  const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
  pair->cost_combo = 0.;
  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
  pair->cost_diff = pair->cost_combo - sum_cost;
@ -799,9 +789,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
 // Create a pair from indices "idx1" and "idx2" provided its cost
 // is inferior to "threshold", a negative entropy.
 // It returns the cost of the pair, or 0. if it superior to threshold.
-static double HistoQueuePush(HistoQueue* const histo_queue,
-                             VP8LHistogram** const histograms, int idx1,
-                             int idx2, double threshold) {
+static float HistoQueuePush(HistoQueue* const histo_queue,
+                            VP8LHistogram** const histograms, int idx1,
+                            int idx2, float threshold) {
  const VP8LHistogram* h1;
  const VP8LHistogram* h2;
  HistogramPair pair;
@ -945,8 +935,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
           ++tries_with_no_success < num_tries_no_success;
       ++iter) {
    int* mapping_index;
-    double best_cost =
-        (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;
+    float best_cost =
+        (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff;
    int best_idx1 = -1, best_idx2 = 1;
    const uint32_t rand_range = (*num_used - 1) * (*num_used);
    // (*num_used) / 2 was chosen empirically. Less means faster but worse
@ -955,7 +945,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,

    // Pick random samples.
    for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
-      double curr_cost;
+      float curr_cost;
      // Choose two different histograms at random and try to combine them.
      const uint32_t tmp = MyRand(&seed) % rand_range;
      uint32_t idx1 = tmp / (*num_used - 1);
@ -1034,7 +1024,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
  *do_greedy = (*num_used <= min_cluster_size);
  ok = 1;

-End:
+ End:
  HistoQueueClear(&histo_queue);
  WebPSafeFree(mappings);
  return ok;
@ -1057,7 +1047,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
  if (out_size > 1) {
    for (i = 0; i < in_size; ++i) {
      int best_out = 0;
-      double best_bits = MAX_COST;
+      float best_bits = MAX_BIT_COST;
      int k;
      if (in_histo[i] == NULL) {
        // Arbitrarily set to the previous value if unused to help future LZ77.
@ -1065,7 +1055,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
        continue;
      }
      for (k = 0; k < out_size; ++k) {
-        double cur_bits;
+        float cur_bits;
        cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
        if (k == 0 || cur_bits < best_bits) {
          best_bits = cur_bits;
@ -1093,13 +1083,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
  }
 }

-static double GetCombineCostFactor(int histo_size, int quality) {
-  double combine_cost_factor = 0.16;
+static float GetCombineCostFactor(int histo_size, int quality) {
+  float combine_cost_factor = 0.16f;
  if (quality < 90) {
-    if (histo_size > 256) combine_cost_factor /= 2.;
-    if (histo_size > 512) combine_cost_factor /= 2.;
-    if (histo_size > 1024) combine_cost_factor /= 2.;
-    if (quality <= 50) combine_cost_factor /= 2.;
+    if (histo_size > 256) combine_cost_factor /= 2.f;
+    if (histo_size > 512) combine_cost_factor /= 2.f;
+    if (histo_size > 1024) combine_cost_factor /= 2.f;
+    if (quality <= 50) combine_cost_factor /= 2.f;
  }
  return combine_cost_factor;
 }
@ -1169,13 +1159,13 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) {
 }

 int VP8LGetHistoImageSymbols(int xsize, int ysize,
-                             const VP8LBackwardRefs* const refs,
-                             int quality, int low_effort,
-                             int histogram_bits, int cache_bits,
+                             const VP8LBackwardRefs* const refs, int quality,
+                             int low_effort, int histogram_bits, int cache_bits,
                             VP8LHistogramSet* const image_histo,
                             VP8LHistogram* const tmp_histo,
-                             uint16_t* const histogram_symbols) {
-  int ok = 0;
+                             uint16_t* const histogram_symbols,
+                             const WebPPicture* const pic, int percent_range,
+                             int* const percent) {
  const int histo_xsize =
      histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1;
  const int histo_ysize =
@ -1192,7 +1182,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
      WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp));
  uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
  int num_used = image_histo_raw_size;
-  if (orig_histo == NULL || map_tmp == NULL) goto Error;
+  if (orig_histo == NULL || map_tmp == NULL) {
+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    goto Error;
+  }

  // Construct the histograms from backward references.
  HistogramBuild(xsize, histogram_bits, refs, orig_histo);
@ -1206,16 +1199,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,

  if (entropy_combine) {
    uint16_t* const bin_map = map_tmp;
-    const double combine_cost_factor =
+    const float combine_cost_factor =
        GetCombineCostFactor(image_histo_raw_size, quality);
    const uint32_t num_clusters = num_used;

    HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
    // Collapse histograms with similar entropy.
-    HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols,
-                               cluster_mappings, tmp_histo, bin_map,
-                               entropy_combine_num_bins, combine_cost_factor,
-                               low_effort);
+    HistogramCombineEntropyBin(
+        image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo,
+        bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort);
    OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters,
                             map_tmp, histogram_symbols);
  }
@ -1229,11 +1221,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
    int do_greedy;
    if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
                                    &do_greedy)) {
+      WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
      goto Error;
    }
    if (do_greedy) {
      RemoveEmptyHistograms(image_histo);
      if (!HistogramCombineGreedy(image_histo, &num_used)) {
+        WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
        goto Error;
      }
    }
@ -1243,10 +1237,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
  RemoveEmptyHistograms(image_histo);
  HistogramRemap(orig_histo, image_histo, histogram_symbols);

-  ok = 1;
+  if (!WebPReportProgress(pic, *percent + percent_range, percent)) {
+    goto Error;
+  }

 Error:
  VP8LFreeHistogramSet(orig_histo);
  WebPSafeFree(map_tmp);
-  return ok;
+  return (pic->error_code == VP8_ENC_OK);
 }
--- a/thirdparty/libwebp/src/enc/histogram_enc.h
+++ b/thirdparty/libwebp/src/enc/histogram_enc.h
@ -40,10 +40,10 @@ typedef struct {
  int palette_code_bits_;
  uint32_t trivial_symbol_;  // True, if histograms for Red, Blue & Alpha
                             // literal symbols are single valued.
-  double bit_cost_;          // cached value of bit cost.
-  double literal_cost_;      // Cached values of dominant entropy costs:
-  double red_cost_;          // literal, red & blue.
-  double blue_cost_;
+  float bit_cost_;           // cached value of bit cost.
+  float literal_cost_;       // Cached values of dominant entropy costs:
+  float red_cost_;           // literal, red & blue.
+  float blue_cost_;
  uint8_t is_used_[5];       // 5 for literal, red, blue, alpha, distance
 } VP8LHistogram;

@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
      ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
 }

-// Builds the histogram image.
+// Builds the histogram image. pic and percent are for progress.
+// Returns false in case of error (stored in pic->error_code).
 int VP8LGetHistoImageSymbols(int xsize, int ysize,
-                             const VP8LBackwardRefs* const refs,
-                             int quality, int low_effort,
-                             int histogram_bits, int cache_bits,
+                             const VP8LBackwardRefs* const refs, int quality,
+                             int low_effort, int histogram_bits, int cache_bits,
                             VP8LHistogramSet* const image_histo,
                             VP8LHistogram* const tmp_histo,
-                             uint16_t* const histogram_symbols);
+                             uint16_t* const histogram_symbols,
+                             const WebPPicture* const pic, int percent_range,
+                             int* const percent);

 // Returns the entropy for the symbols in the input array.
-double VP8LBitsEntropy(const uint32_t* const array, int n);
+float VP8LBitsEntropy(const uint32_t* const array, int n);

 // Estimate how many bits the combined entropy of literals and distance
 // approximately maps to.
-double VP8LHistogramEstimateBits(VP8LHistogram* const p);
+float VP8LHistogramEstimateBits(VP8LHistogram* const p);

 #ifdef __cplusplus
 }
--- a/thirdparty/libwebp/src/enc/picture_csp_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c
@ -15,12 +15,19 @@
 #include <stdlib.h>
 #include <math.h>

+#include "sharpyuv/sharpyuv.h"
+#include "sharpyuv/sharpyuv_csp.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/random_utils.h"
 #include "src/utils/utils.h"
 #include "src/dsp/dsp.h"
 #include "src/dsp/lossless.h"
 #include "src/dsp/yuv.h"
+#include "src/dsp/cpu.h"
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h>
+#endif

 // Uncomment to disable gamma-compression during RGB->U/V averaging
 #define USE_GAMMA_COMPRESSION
@ -76,16 +83,16 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {

 #if defined(USE_GAMMA_COMPRESSION)

-// gamma-compensates loss of resolution during chroma subsampling
-#define kGamma 0.80      // for now we use a different gamma value than kGammaF
-#define kGammaFix 12     // fixed-point precision for linear values
-#define kGammaScale ((1 << kGammaFix) - 1)
-#define kGammaTabFix 7   // fixed-point fractional bits precision
-#define kGammaTabScale (1 << kGammaTabFix)
-#define kGammaTabRounder (kGammaTabScale >> 1)
-#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
+// Gamma correction compensates loss of resolution during chroma subsampling.
+#define GAMMA_FIX 12      // fixed-point precision for linear values
+#define GAMMA_TAB_FIX 7   // fixed-point fractional bits precision
+#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX))
+static const double kGamma = 0.80;
+static const int kGammaScale = ((1 << GAMMA_FIX) - 1);
+static const int kGammaTabScale = (1 << GAMMA_TAB_FIX);
+static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1);

-static int kLinearToGammaTab[kGammaTabSize + 1];
+static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
 static uint16_t kGammaToLinearTab[256];
 static volatile int kGammaTablesOk = 0;
 static void InitGammaTables(void);
@ -93,13 +100,13 @@ static void InitGammaTables(void);
 WEBP_DSP_INIT_FUNC(InitGammaTables) {
  if (!kGammaTablesOk) {
    int v;
-    const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
+    const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale;
    const double norm = 1. / 255.;
    for (v = 0; v <= 255; ++v) {
      kGammaToLinearTab[v] =
          (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
    }
-    for (v = 0; v <= kGammaTabSize; ++v) {
+    for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
      kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
    }
    kGammaTablesOk = 1;
@ -111,12 +118,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
 }

 static WEBP_INLINE int Interpolate(int v) {
-  const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
+  const int tab_pos = v >> (GAMMA_TAB_FIX + 2);    // integer part
  const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
  const int v0 = kLinearToGammaTab[tab_pos];
  const int v1 = kLinearToGammaTab[tab_pos + 1];
  const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
-  assert(tab_pos + 1 < kGammaTabSize + 1);
+  assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1);
  return y;
 }

@ -124,7 +131,7 @@ static WEBP_INLINE int Interpolate(int v) {
 // U/V value, suitable for RGBToU/V calls.
 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
  const int y = Interpolate(base_value << shift);   // final uplifted value
-  return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
+  return (y + kGammaTabRounder) >> GAMMA_TAB_FIX;    // descale
 }

 #else
@ -158,415 +165,41 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) {
 //------------------------------------------------------------------------------
 // Sharp RGB->YUV conversion

-static const int kNumIterations = 4;
 static const int kMinDimensionIterativeConversion = 4;

-// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
-// banding sometimes. Better use extra precision.
-#define SFIX 2                // fixed-point precision of RGB and Y/W
-typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
-typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
-
-#define SHALF (1 << SFIX >> 1)
-#define MAX_Y_T ((256 << SFIX) - 1)
-#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
-
-#if defined(USE_GAMMA_COMPRESSION)
-
-// We use tables of different size and precision for the Rec709 / BT2020
-// transfer function.
-#define kGammaF (1./0.45)
-static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
-#define GAMMA_TO_LINEAR_BITS 14
-static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX
-static volatile int kGammaTablesSOk = 0;
-static void InitGammaTablesS(void);
-
-WEBP_DSP_INIT_FUNC(InitGammaTablesS) {
-  assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
-  if (!kGammaTablesSOk) {
-    int v;
-    const double norm = 1. / MAX_Y_T;
-    const double scale = 1. / kGammaTabSize;
-    const double a = 0.09929682680944;
-    const double thresh = 0.018053968510807;
-    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
-    for (v = 0; v <= MAX_Y_T; ++v) {
-      const double g = norm * v;
-      double value;
-      if (g <= thresh * 4.5) {
-        value = g / 4.5;
-      } else {
-        const double a_rec = 1. / (1. + a);
-        value = pow(a_rec * (g + a), kGammaF);
-      }
-      kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
-    }
-    for (v = 0; v <= kGammaTabSize; ++v) {
-      const double g = scale * v;
-      double value;
-      if (g <= thresh) {
-        value = 4.5 * g;
-      } else {
-        value = (1. + a) * pow(g, 1. / kGammaF) - a;
-      }
-      // we already incorporate the 1/2 rounding constant here
-      kLinearToGammaTabS[v] =
-          (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
-    }
-    // to prevent small rounding errors to cause read-overflow:
-    kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
-    kGammaTablesSOk = 1;
-  }
-}
-
-// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
-static WEBP_INLINE uint32_t GammaToLinearS(int v) {
-  return kGammaToLinearTabS[v];
-}
-
-static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
-  // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
-  const uint32_t v = value * kGammaTabSize;
-  const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
-  // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
-  const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part
-  // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
-  const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
-  const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
-  // Final interpolation. Note that rounding is already included.
-  const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0.
-  const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
-  return result;
-}
-
-#else
-
-static void InitGammaTablesS(void) {}
-static WEBP_INLINE uint32_t GammaToLinearS(int v) {
-  return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
-}
-static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
-  return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
-}
-
-#endif    // USE_GAMMA_COMPRESSION
-
-//------------------------------------------------------------------------------
-
-static uint8_t clip_8b(fixed_t v) {
-  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
-}
-
-static fixed_y_t clip_y(int y) {
-  return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
-}
-
-//------------------------------------------------------------------------------
-
-static int RGBToGray(int r, int g, int b) {
-  const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
-  return (luma >> YUV_FIX);
-}
-
-static uint32_t ScaleDown(int a, int b, int c, int d) {
-  const uint32_t A = GammaToLinearS(a);
-  const uint32_t B = GammaToLinearS(b);
-  const uint32_t C = GammaToLinearS(c);
-  const uint32_t D = GammaToLinearS(d);
-  return LinearToGammaS((A + B + C + D + 2) >> 2);
-}
-
-static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
-  int i;
-  for (i = 0; i < w; ++i) {
-    const uint32_t R = GammaToLinearS(src[0 * w + i]);
-    const uint32_t G = GammaToLinearS(src[1 * w + i]);
-    const uint32_t B = GammaToLinearS(src[2 * w + i]);
-    const uint32_t Y = RGBToGray(R, G, B);
-    dst[i] = (fixed_y_t)LinearToGammaS(Y);
-  }
-}
-
-static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
-                         fixed_t* dst, int uv_w) {
-  int i;
-  for (i = 0; i < uv_w; ++i) {
-    const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
-                            src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
-    const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
-                            src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
-    const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
-                            src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
-    const int W = RGBToGray(r, g, b);
-    dst[0 * uv_w] = (fixed_t)(r - W);
-    dst[1 * uv_w] = (fixed_t)(g - W);
-    dst[2 * uv_w] = (fixed_t)(b - W);
-    dst  += 1;
-    src1 += 2;
-    src2 += 2;
-  }
-}
-
-static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
-  int i;
-  for (i = 0; i < w; ++i) {
-    y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
-  }
-}
-
-//------------------------------------------------------------------------------
-
-static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
-  const int v0 = (A * 3 + B + 2) >> 2;
-  return clip_y(v0 + W0);
-}
-
-//------------------------------------------------------------------------------
-
-static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
-  return ((fixed_y_t)a << SFIX) | SHALF;
-}
-
-static void ImportOneRow(const uint8_t* const r_ptr,
-                         const uint8_t* const g_ptr,
-                         const uint8_t* const b_ptr,
-                         int step,
-                         int pic_width,
-                         fixed_y_t* const dst) {
-  int i;
-  const int w = (pic_width + 1) & ~1;
-  for (i = 0; i < pic_width; ++i) {
-    const int off = i * step;
-    dst[i + 0 * w] = UpLift(r_ptr[off]);
-    dst[i + 1 * w] = UpLift(g_ptr[off]);
-    dst[i + 2 * w] = UpLift(b_ptr[off]);
-  }
-  if (pic_width & 1) {  // replicate rightmost pixel
-    dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
-    dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
-    dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
-  }
-}
-
-static void InterpolateTwoRows(const fixed_y_t* const best_y,
-                               const fixed_t* prev_uv,
-                               const fixed_t* cur_uv,
-                               const fixed_t* next_uv,
-                               int w,
-                               fixed_y_t* out1,
-                               fixed_y_t* out2) {
-  const int uv_w = w >> 1;
-  const int len = (w - 1) >> 1;   // length to filter
-  int k = 3;
-  while (k-- > 0) {   // process each R/G/B segments in turn
-    // special boundary case for i==0
-    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
-    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
-
-    WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
-    WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
-
-    // special boundary case for i == w - 1 when w is even
-    if (!(w & 1)) {
-      out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
-                            best_y[w - 1 + 0]);
-      out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
-                            best_y[w - 1 + w]);
-    }
-    out1 += w;
-    out2 += w;
-    prev_uv += uv_w;
-    cur_uv  += uv_w;
-    next_uv += uv_w;
-  }
-}
-
-static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
-  const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
-  return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
-}
-
-static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
-  const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
-  return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
-}
-
-static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
-  const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
-  return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
-}
-
-static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
-                            WebPPicture* const picture) {
-  int i, j;
-  uint8_t* dst_y = picture->y;
-  uint8_t* dst_u = picture->u;
-  uint8_t* dst_v = picture->v;
-  const fixed_t* const best_uv_base = best_uv;
-  const int w = (picture->width + 1) & ~1;
-  const int h = (picture->height + 1) & ~1;
-  const int uv_w = w >> 1;
-  const int uv_h = h >> 1;
-  for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
-    for (i = 0; i < picture->width; ++i) {
-      const int off = (i >> 1);
-      const int W = best_y[i];
-      const int r = best_uv[off + 0 * uv_w] + W;
-      const int g = best_uv[off + 1 * uv_w] + W;
-      const int b = best_uv[off + 2 * uv_w] + W;
-      dst_y[i] = ConvertRGBToY(r, g, b);
-    }
-    best_y += w;
-    best_uv += (j & 1) * 3 * uv_w;
-    dst_y += picture->y_stride;
-  }
-  for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
-    for (i = 0; i < uv_w; ++i) {
-      const int off = i;
-      const int r = best_uv[off + 0 * uv_w];
-      const int g = best_uv[off + 1 * uv_w];
-      const int b = best_uv[off + 2 * uv_w];
-      dst_u[i] = ConvertRGBToU(r, g, b);
-      dst_v[i] = ConvertRGBToV(r, g, b);
-    }
-    best_uv += 3 * uv_w;
-    dst_u += picture->uv_stride;
-    dst_v += picture->uv_stride;
-  }
-  return 1;
-}
-
 //------------------------------------------------------------------------------
 // Main function

-#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
+extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
+
+static void SafeInitSharpYuv(void) {
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+  static pthread_mutex_t initsharpyuv_lock = PTHREAD_MUTEX_INITIALIZER;
+  if (pthread_mutex_lock(&initsharpyuv_lock)) return;
+#endif
+
+  SharpYuvInit(VP8GetCPUInfo);
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+  (void)pthread_mutex_unlock(&initsharpyuv_lock);
+#endif
+}

 static int PreprocessARGB(const uint8_t* r_ptr,
                          const uint8_t* g_ptr,
                          const uint8_t* b_ptr,
                          int step, int rgb_stride,
                          WebPPicture* const picture) {
-  // we expand the right/bottom border if needed
-  const int w = (picture->width + 1) & ~1;
-  const int h = (picture->height + 1) & ~1;
-  const int uv_w = w >> 1;
-  const int uv_h = h >> 1;
-  uint64_t prev_diff_y_sum = ~0;
-  int j, iter;
-
-  // TODO(skal): allocate one big memory chunk. But for now, it's easier
-  // for valgrind debugging to have several chunks.
-  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
-  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
-  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
-  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
-  fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
-  fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
-  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
-  fixed_y_t* best_y = best_y_base;
-  fixed_y_t* target_y = target_y_base;
-  fixed_t* best_uv = best_uv_base;
-  fixed_t* target_uv = target_uv_base;
-  const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
-  int ok;
-
-  if (best_y_base == NULL || best_uv_base == NULL ||
-      target_y_base == NULL || target_uv_base == NULL ||
-      best_rgb_y == NULL || best_rgb_uv == NULL ||
-      tmp_buffer == NULL) {
-    ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-    goto End;
+  const int ok = SharpYuvConvert(
+      r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8,
+      picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v,
+      picture->uv_stride, /*yuv_bit_depth=*/8, picture->width,
+      picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp));
+  if (!ok) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
  }
-  assert(picture->width >= kMinDimensionIterativeConversion);
-  assert(picture->height >= kMinDimensionIterativeConversion);
-
-  WebPInitConvertARGBToYUV();
-
-  // Import RGB samples to W/RGB representation.
-  for (j = 0; j < picture->height; j += 2) {
-    const int is_last_row = (j == picture->height - 1);
-    fixed_y_t* const src1 = tmp_buffer + 0 * w;
-    fixed_y_t* const src2 = tmp_buffer + 3 * w;
-
-    // prepare two rows of input
-    ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
-    if (!is_last_row) {
-      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
-                   step, picture->width, src2);
-    } else {
-      memcpy(src2, src1, 3 * w * sizeof(*src2));
-    }
-    StoreGray(src1, best_y + 0, w);
-    StoreGray(src2, best_y + w, w);
-
-    UpdateW(src1, target_y, w);
-    UpdateW(src2, target_y + w, w);
-    UpdateChroma(src1, src2, target_uv, uv_w);
-    memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
-    best_y += 2 * w;
-    best_uv += 3 * uv_w;
-    target_y += 2 * w;
-    target_uv += 3 * uv_w;
-    r_ptr += 2 * rgb_stride;
-    g_ptr += 2 * rgb_stride;
-    b_ptr += 2 * rgb_stride;
-  }
-
-  // Iterate and resolve clipping conflicts.
-  for (iter = 0; iter < kNumIterations; ++iter) {
-    const fixed_t* cur_uv = best_uv_base;
-    const fixed_t* prev_uv = best_uv_base;
-    uint64_t diff_y_sum = 0;
-
-    best_y = best_y_base;
-    best_uv = best_uv_base;
-    target_y = target_y_base;
-    target_uv = target_uv_base;
-    for (j = 0; j < h; j += 2) {
-      fixed_y_t* const src1 = tmp_buffer + 0 * w;
-      fixed_y_t* const src2 = tmp_buffer + 3 * w;
-      {
-        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
-        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
-        prev_uv = cur_uv;
-        cur_uv = next_uv;
-      }
-
-      UpdateW(src1, best_rgb_y + 0 * w, w);
-      UpdateW(src2, best_rgb_y + 1 * w, w);
-      UpdateChroma(src1, src2, best_rgb_uv, uv_w);
-
-      // update two rows of Y and one row of RGB
-      diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
-      WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
-
-      best_y += 2 * w;
-      best_uv += 3 * uv_w;
-      target_y += 2 * w;
-      target_uv += 3 * uv_w;
-    }
-    // test exit condition
-    if (iter > 0) {
-      if (diff_y_sum < diff_y_threshold) break;
-      if (diff_y_sum > prev_diff_y_sum) break;
-    }
-    prev_diff_y_sum = diff_y_sum;
-  }
-  // final reconstruction
-  ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
-
- End:
-  WebPSafeFree(best_y_base);
-  WebPSafeFree(best_uv_base);
-  WebPSafeFree(target_y_base);
-  WebPSafeFree(target_uv_base);
-  WebPSafeFree(best_rgb_y);
-  WebPSafeFree(best_rgb_uv);
-  WebPSafeFree(tmp_buffer);
  return ok;
 }
-#undef SAFE_ALLOC

 //------------------------------------------------------------------------------
 // "Fast" regular RGB->YUV
@ -591,8 +224,8 @@ static const int kAlphaFix = 19;
 // and constant are adjusted very tightly to fit 32b arithmetic.
 // In particular, they use the fact that the operands for 'v / a' are actually
 // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
-// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
-// overflow is: kGammaFix + kAlphaFix <= 31.
+// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid
+// overflow is: GAMMA_FIX + kAlphaFix <= 31.
 static const uint32_t kInvAlpha[4 * 0xff + 1] = {
  0,  /* alpha = 0 */
  524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
@ -818,11 +451,20 @@ static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
    dst[0] = SUM4(r_ptr + j, step);
    dst[1] = SUM4(g_ptr + j, step);
    dst[2] = SUM4(b_ptr + j, step);
+    // MemorySanitizer may raise false positives with data that passes through
+    // RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles.
+    // See https://crbug.com/webp/573.
+#ifdef WEBP_MSAN
+    dst[3] = 0;
+#endif
  }
  if (width & 1) {
    dst[0] = SUM2(r_ptr + j);
    dst[1] = SUM2(g_ptr + j);
    dst[2] = SUM2(b_ptr + j);
+#ifdef WEBP_MSAN
+    dst[3] = 0;
+#endif
  }
 }

@ -863,18 +505,18 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
    use_iterative_conversion = 0;
  }

-  if (!WebPPictureAllocYUVA(picture, width, height)) {
+  if (!WebPPictureAllocYUVA(picture)) {
    return 0;
  }
  if (has_alpha) {
    assert(step == 4);
 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
-    assert(kAlphaFix + kGammaFix <= 31);
+    assert(kAlphaFix + GAMMA_FIX <= 31);
 #endif
  }

  if (use_iterative_conversion) {
-    InitGammaTablesS();
+    SafeInitSharpYuv();
    if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
      return 0;
    }
@ -1044,7 +686,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
  }
  // Allocate a new argb buffer (discarding the previous one).
-  if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
+  if (!WebPPictureAllocARGB(picture)) return 0;
  picture->use_argb = 1;

  // Convert
@ -1106,6 +748,8 @@ static int Import(WebPPicture* const picture,
  const int width = picture->width;
  const int height = picture->height;

+  if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0;
+
  if (!picture->use_argb) {
    const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
    return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
@ -1163,24 +807,24 @@ static int Import(WebPPicture* const picture,
 #if !defined(WEBP_REDUCE_CSP)

 int WebPPictureImportBGR(WebPPicture* picture,
-                         const uint8_t* rgb, int rgb_stride) {
-  return (picture != NULL && rgb != NULL)
-             ? Import(picture, rgb, rgb_stride, 3, 1, 0)
+                         const uint8_t* bgr, int bgr_stride) {
+  return (picture != NULL && bgr != NULL)
+             ? Import(picture, bgr, bgr_stride, 3, 1, 0)
             : 0;
 }

 int WebPPictureImportBGRA(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 1, 1)
+                          const uint8_t* bgra, int bgra_stride) {
+  return (picture != NULL && bgra != NULL)
+             ? Import(picture, bgra, bgra_stride, 4, 1, 1)
             : 0;
 }


 int WebPPictureImportBGRX(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 1, 0)
+                          const uint8_t* bgrx, int bgrx_stride) {
+  return (picture != NULL && bgrx != NULL)
+             ? Import(picture, bgrx, bgrx_stride, 4, 1, 0)
             : 0;
 }

@ -1201,9 +845,9 @@ int WebPPictureImportRGBA(WebPPicture* picture,
 }

 int WebPPictureImportRGBX(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return (picture != NULL && rgba != NULL)
-             ? Import(picture, rgba, rgba_stride, 4, 0, 0)
+                          const uint8_t* rgbx, int rgbx_stride) {
+  return (picture != NULL && rgbx != NULL)
+             ? Import(picture, rgbx, rgbx_stride, 4, 0, 0)
             : 0;
 }

--- a/thirdparty/libwebp/src/enc/picture_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_enc.c
@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) {

 //------------------------------------------------------------------------------

+int WebPValidatePicture(const WebPPicture* const picture) {
+  if (picture == NULL) return 0;
+  if (picture->width <= 0 || picture->height <= 0) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
+  }
+  if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 ||
+      picture->height <= 0 || picture->height / 4 > INT_MAX / 4) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
+  }
+  if (picture->colorspace != WEBP_YUV420 &&
+      picture->colorspace != WEBP_YUV420A) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  }
+  return 1;
+}
+
 static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
  picture->memory_argb_ = NULL;
  picture->argb = NULL;
@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) {
  WebPPictureResetBufferYUVA(picture);
 }

-int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
+int WebPPictureAllocARGB(WebPPicture* const picture) {
  void* memory;
+  const int width = picture->width;
+  const int height = picture->height;
  const uint64_t argb_size = (uint64_t)width * height;

-  assert(picture != NULL);
+  if (!WebPValidatePicture(picture)) return 0;

  WebPSafeFree(picture->memory_argb_);
  WebPPictureResetBufferARGB(picture);

-  if (width <= 0 || height <= 0) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-  }
  // allocate a new buffer.
  memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb));
  if (memory == NULL) {
@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
  return 1;
 }

-int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
-  const WebPEncCSP uv_csp =
-      (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK);
+int WebPPictureAllocYUVA(WebPPicture* const picture) {
  const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT;
+  const int width = picture->width;
+  const int height = picture->height;
  const int y_stride = width;
  const int uv_width = (int)(((int64_t)width + 1) >> 1);
  const int uv_height = (int)(((int64_t)height + 1) >> 1);
@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
  uint64_t y_size, uv_size, a_size, total_size;
  uint8_t* mem;

-  assert(picture != NULL);
+  if (!WebPValidatePicture(picture)) return 0;

  WebPSafeFree(picture->memory_);
  WebPPictureResetBufferYUVA(picture);

-  if (uv_csp != WEBP_YUV420) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-  }
-
  // alpha
  a_width = has_alpha ? width : 0;
  a_stride = a_width;
@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {

 int WebPPictureAlloc(WebPPicture* picture) {
  if (picture != NULL) {
-    const int width = picture->width;
-    const int height = picture->height;
-
    WebPPictureFree(picture);   // erase previous buffer

    if (!picture->use_argb) {
-      return WebPPictureAllocYUVA(picture, width, height);
+      return WebPPictureAllocYUVA(picture);
    } else {
-      return WebPPictureAllocARGB(picture, width, height);
+      return WebPPictureAllocARGB(picture);
    }
  }
  return 1;
--- a/thirdparty/libwebp/src/enc/picture_rescale_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c
@ -13,14 +13,15 @@

 #include "src/webp/encode.h"

-#if !defined(WEBP_REDUCE_SIZE)
-
 #include <assert.h>
 #include <stdlib.h>

 #include "src/enc/vp8i_enc.h"
+
+#if !defined(WEBP_REDUCE_SIZE)
 #include "src/utils/rescaler_utils.h"
 #include "src/utils/utils.h"
+#endif  // !defined(WEBP_REDUCE_SIZE)

 #define HALVE(x) (((x) + 1) >> 1)

@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic,
  return 1;
 }

+#if !defined(WEBP_REDUCE_SIZE)
 int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
  if (src == NULL || dst == NULL) return 0;
  if (src == dst) return 1;
@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
  }
  return 1;
 }
+#endif  // !defined(WEBP_REDUCE_SIZE)

 int WebPPictureIsView(const WebPPicture* picture) {
  if (picture == NULL) return 0;
@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src,
  return 1;
 }

+#if !defined(WEBP_REDUCE_SIZE)
 //------------------------------------------------------------------------------
 // Picture cropping

@ -198,34 +202,34 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
  }
 }

-int WebPPictureRescale(WebPPicture* pic, int width, int height) {
+int WebPPictureRescale(WebPPicture* picture, int width, int height) {
  WebPPicture tmp;
  int prev_width, prev_height;
  rescaler_t* work;

-  if (pic == NULL) return 0;
-  prev_width = pic->width;
-  prev_height = pic->height;
+  if (picture == NULL) return 0;
+  prev_width = picture->width;
+  prev_height = picture->height;
  if (!WebPRescalerGetScaledDimensions(
          prev_width, prev_height, &width, &height)) {
    return 0;
  }

-  PictureGrabSpecs(pic, &tmp);
+  PictureGrabSpecs(picture, &tmp);
  tmp.width = width;
  tmp.height = height;
  if (!WebPPictureAlloc(&tmp)) return 0;

-  if (!pic->use_argb) {
+  if (!picture->use_argb) {
    work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
    if (work == NULL) {
      WebPPictureFree(&tmp);
      return 0;
    }
    // If present, we need to rescale alpha first (for AlphaMultiplyY).
-    if (pic->a != NULL) {
+    if (picture->a != NULL) {
      WebPInitAlphaProcessing();
-      if (!RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
+      if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride,
                        tmp.a, width, height, tmp.a_stride, work, 1)) {
        return 0;
      }
@ -233,17 +237,15 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {

    // We take transparency into account on the luma plane only. That's not
    // totally exact blending, but still is a good approximation.
-    AlphaMultiplyY(pic, 0);
-    if (!RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
+    AlphaMultiplyY(picture, 0);
+    if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride,
                      tmp.y, width, height, tmp.y_stride, work, 1) ||
-        !RescalePlane(pic->u,
-                      HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
-                      tmp.u,
-                      HALVE(width), HALVE(height), tmp.uv_stride, work, 1) ||
-        !RescalePlane(pic->v,
-                      HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
-                      tmp.v,
-                      HALVE(width), HALVE(height), tmp.uv_stride, work, 1)) {
+        !RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height),
+                      picture->uv_stride, tmp.u, HALVE(width), HALVE(height),
+                      tmp.uv_stride, work, 1) ||
+        !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height),
+                      picture->uv_stride, tmp.v, HALVE(width), HALVE(height),
+                      tmp.uv_stride, work, 1)) {
      return 0;
    }
    AlphaMultiplyY(&tmp, 1);
@ -257,18 +259,17 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
    // weighting first (black-matting), scale the RGB values, and remove
    // the premultiplication afterward (while preserving the alpha channel).
    WebPInitAlphaProcessing();
-    AlphaMultiplyARGB(pic, 0);
-    if (!RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
-                      pic->argb_stride * 4,
-                      (uint8_t*)tmp.argb, width, height,
-                      tmp.argb_stride * 4, work, 4)) {
+    AlphaMultiplyARGB(picture, 0);
+    if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height,
+                      picture->argb_stride * 4, (uint8_t*)tmp.argb, width,
+                      height, tmp.argb_stride * 4, work, 4)) {
      return 0;
    }
    AlphaMultiplyARGB(&tmp, 1);
  }
-  WebPPictureFree(pic);
+  WebPPictureFree(picture);
  WebPSafeFree(work);
-  *pic = tmp;
+  *picture = tmp;
  return 1;
 }

@ -280,23 +281,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
  return 0;
 }

-int WebPPictureIsView(const WebPPicture* picture) {
-  (void)picture;
-  return 0;
-}
-
-int WebPPictureView(const WebPPicture* src,
-                    int left, int top, int width, int height,
-                    WebPPicture* dst) {
-  (void)src;
-  (void)left;
-  (void)top;
-  (void)width;
-  (void)height;
-  (void)dst;
-  return 0;
-}
-
 int WebPPictureCrop(WebPPicture* pic,
                    int left, int top, int width, int height) {
  (void)pic;
--- a/thirdparty/libwebp/src/enc/picture_tools_enc.c
+++ b/thirdparty/libwebp/src/enc/picture_tools_enc.c
@ -190,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
  return (0xff000000u | (r << 16) | (g << 8) | b);
 }

-void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
+void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) {
  const int red = (background_rgb >> 16) & 0xff;
  const int green = (background_rgb >> 8) & 0xff;
  const int blue = (background_rgb >> 0) & 0xff;
  int x, y;
-  if (pic == NULL) return;
-  if (!pic->use_argb) {
-    const int uv_width = (pic->width >> 1);  // omit last pixel during u/v loop
+  if (picture == NULL) return;
+  if (!picture->use_argb) {
+    // omit last pixel during u/v loop
+    const int uv_width = (picture->width >> 1);
    const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
    // VP8RGBToU/V expects the u/v values summed over four pixels
    const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
    const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
-    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
-    uint8_t* y_ptr = pic->y;
-    uint8_t* u_ptr = pic->u;
-    uint8_t* v_ptr = pic->v;
-    uint8_t* a_ptr = pic->a;
+    const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
+    uint8_t* y_ptr = picture->y;
+    uint8_t* u_ptr = picture->u;
+    uint8_t* v_ptr = picture->v;
+    uint8_t* a_ptr = picture->a;
    if (!has_alpha || a_ptr == NULL) return;    // nothing to do
-    for (y = 0; y < pic->height; ++y) {
+    for (y = 0; y < picture->height; ++y) {
      // Luma blending
-      for (x = 0; x < pic->width; ++x) {
+      for (x = 0; x < picture->width; ++x) {
        const uint8_t alpha = a_ptr[x];
        if (alpha < 0xff) {
          y_ptr[x] = BLEND(Y0, y_ptr[x], alpha);
@ -219,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
      // Chroma blending every even line
      if ((y & 1) == 0) {
        uint8_t* const a_ptr2 =
-            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
+            (y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride;
        for (x = 0; x < uv_width; ++x) {
          // Average four alpha values into a single blending weight.
          // TODO(skal): might lead to visible contouring. Can we do better?
@ -229,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
          u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
          v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
        }
-        if (pic->width & 1) {   // rightmost pixel
+        if (picture->width & 1) {  // rightmost pixel
          const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
          u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
          v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
        }
      } else {
-        u_ptr += pic->uv_stride;
-        v_ptr += pic->uv_stride;
+        u_ptr += picture->uv_stride;
+        v_ptr += picture->uv_stride;
      }
-      memset(a_ptr, 0xff, pic->width);  // reset alpha value to opaque
-      a_ptr += pic->a_stride;
-      y_ptr += pic->y_stride;
+      memset(a_ptr, 0xff, picture->width);  // reset alpha value to opaque
+      a_ptr += picture->a_stride;
+      y_ptr += picture->y_stride;
    }
  } else {
-    uint32_t* argb = pic->argb;
+    uint32_t* argb = picture->argb;
    const uint32_t background = MakeARGB32(red, green, blue);
-    for (y = 0; y < pic->height; ++y) {
-      for (x = 0; x < pic->width; ++x) {
+    for (y = 0; y < picture->height; ++y) {
+      for (x = 0; x < picture->width; ++x) {
        const int alpha = (argb[x] >> 24) & 0xff;
        if (alpha != 0xff) {
          if (alpha > 0) {
@ -262,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
          }
        }
      }
-      argb += pic->argb_stride;
+      argb += picture->argb_stride;
    }
  }
 }
--- a/thirdparty/libwebp/src/enc/predictor_enc.c
+++ b/thirdparty/libwebp/src/enc/predictor_enc.c
@ -16,6 +16,7 @@

 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"
+#include "src/enc/vp8i_enc.h"
 #include "src/enc/vp8li_enc.h"

 #define MAX_DIFF_COST (1e30f)
@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
 // Methods to calculate Entropy (Shannon).

 static float PredictionCostSpatial(const int counts[256], int weight_0,
-                                   double exp_val) {
+                                   float exp_val) {
  const int significant_symbols = 256 >> 4;
-  const double exp_decay_factor = 0.6;
-  double bits = weight_0 * counts[0];
+  const float exp_decay_factor = 0.6f;
+  float bits = (float)weight_0 * counts[0];
  int i;
  for (i = 1; i < significant_symbols; ++i) {
    bits += exp_val * (counts[i] + counts[256 - i]);
@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0,
 static float PredictionCostSpatialHistogram(const int accumulated[4][256],
                                            const int tile[4][256]) {
  int i;
-  double retval = 0;
+  float retval = 0.f;
  for (i = 0; i < 4; ++i) {
-    const double kExpValue = 0.94;
+    const float kExpValue = 0.94f;
    retval += PredictionCostSpatial(tile[i], 1, kExpValue);
    retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
  }
@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height,
 // with respect to predictions. If near_lossless_quality < 100, applies
 // near lossless processing, shaving off more bits of residuals for lower
 // qualities.
-void VP8LResidualImage(int width, int height, int bits, int low_effort,
-                       uint32_t* const argb, uint32_t* const argb_scratch,
-                       uint32_t* const image, int near_lossless_quality,
-                       int exact, int used_subtract_green) {
+int VP8LResidualImage(int width, int height, int bits, int low_effort,
+                      uint32_t* const argb, uint32_t* const argb_scratch,
+                      uint32_t* const image, int near_lossless_quality,
+                      int exact, int used_subtract_green,
+                      const WebPPicture* const pic, int percent_range,
+                      int* const percent) {
  const int tiles_per_row = VP8LSubSampleSize(width, bits);
  const int tiles_per_col = VP8LSubSampleSize(height, bits);
+  int percent_start = *percent;
  int tile_y;
  int histo[4][256];
  const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
    for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
      int tile_x;
      for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
-        const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
-            bits, histo, argb_scratch, argb, max_quantization, exact,
-            used_subtract_green, image);
+        const int pred = GetBestPredictorForTile(
+            width, height, tile_x, tile_y, bits, histo, argb_scratch, argb,
+            max_quantization, exact, used_subtract_green, image);
        image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
      }
+
+      if (!WebPReportProgress(
+              pic, percent_start + percent_range * tile_y / tiles_per_col,
+              percent)) {
+        return 0;
+      }
    }
  }

  CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
                          low_effort, max_quantization, exact,
                          used_subtract_green);
+  return WebPReportProgress(pic, percent_start + percent_range, percent);
 }

 //------------------------------------------------------------------------------
@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256],
                                      const int counts[256]) {
  // Favor low entropy, locally and globally.
  // Favor small absolute values for PredictionCostSpatial
-  static const double kExpValue = 2.4;
+  static const float kExpValue = 2.4f;
  return VP8LCombinedShannonEntropy(counts, accumulated) +
         PredictionCostSpatial(counts, 3, kExpValue);
 }
@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
  }
 }

-void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
-                             uint32_t* const argb, uint32_t* image) {
+int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
+                            uint32_t* const argb, uint32_t* image,
+                            const WebPPicture* const pic, int percent_range,
+                            int* const percent) {
  const int max_tile_size = 1 << bits;
  const int tile_xsize = VP8LSubSampleSize(width, bits);
  const int tile_ysize = VP8LSubSampleSize(height, bits);
+  int percent_start = *percent;
  int accumulated_red_histo[256] = { 0 };
  int accumulated_blue_histo[256] = { 0 };
  int tile_x, tile_y;
@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
        }
      }
    }
+    if (!WebPReportProgress(
+            pic, percent_start + percent_range * tile_y / tile_ysize,
+            percent)) {
+      return 0;
+    }
  }
+  return 1;
 }
--- a/thirdparty/libwebp/src/enc/quant_enc.c
+++ b/thirdparty/libwebp/src/enc/quant_enc.c
@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) {
  rd->score = MAX_COST;
 }

-static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst,
+                      const VP8ModeScore* WEBP_RESTRICT const src) {
  dst->D  = src->D;
  dst->SD = src->SD;
  dst->R  = src->R;
@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
  dst->score = src->score;
 }

-static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
+                     const VP8ModeScore* WEBP_RESTRICT const src) {
  dst->D  += src->D;
  dst->SD += src->SD;
  dst->R  += src->R;
@ -588,10 +590,10 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
 // Coefficient type.
 enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };

-static int TrellisQuantizeBlock(const VP8Encoder* const enc,
+static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
                                int16_t in[16], int16_t out[16],
                                int ctx0, int coeff_type,
-                                const VP8Matrix* const mtx,
+                                const VP8Matrix* WEBP_RESTRICT const mtx,
                                int lambda) {
  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
  CostArrayPtr const costs =
@ -767,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
 // all at once. Output is the reconstructed block in *yuv_out, and the
 // quantized levels in *levels.

-static int ReconstructIntra16(VP8EncIterator* const it,
-                              VP8ModeScore* const rd,
-                              uint8_t* const yuv_out,
+static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
+                              VP8ModeScore* WEBP_RESTRICT const rd,
+                              uint8_t* WEBP_RESTRICT const yuv_out,
                              int mode) {
  const VP8Encoder* const enc = it->enc_;
  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
@ -819,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it,
  return nz;
 }

-static int ReconstructIntra4(VP8EncIterator* const it,
+static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
                             int16_t levels[16],
-                             const uint8_t* const src,
-                             uint8_t* const yuv_out,
+                             const uint8_t* WEBP_RESTRICT const src,
+                             uint8_t* WEBP_RESTRICT const yuv_out,
                             int mode) {
  const VP8Encoder* const enc = it->enc_;
  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
@ -855,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it,

 // Quantize as usual, but also compute and return the quantization error.
 // Error is already divided by DSHIFT.
-static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
+static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
+                          const VP8Matrix* WEBP_RESTRICT const mtx) {
  int V = *v;
  const int sign = (V < 0);
  if (sign) V = -V;
@ -869,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
  return (sign ? -V : V) >> DSCALE;
 }

-static void CorrectDCValues(const VP8EncIterator* const it,
-                            const VP8Matrix* const mtx,
-                            int16_t tmp[][16], VP8ModeScore* const rd) {
+static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
+                            const VP8Matrix* WEBP_RESTRICT const mtx,
+                            int16_t tmp[][16],
+                            VP8ModeScore* WEBP_RESTRICT const rd) {
  //         | top[0] | top[1]
  // --------+--------+---------
  // left[0] | tmp[0]   tmp[1]  <->   err0 err1
@ -902,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it,
  }
 }

-static void StoreDiffusionErrors(VP8EncIterator* const it,
-                                 const VP8ModeScore* const rd) {
+static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
+                                 const VP8ModeScore* WEBP_RESTRICT const rd) {
  int ch;
  for (ch = 0; ch <= 1; ++ch) {
    int8_t* const top = it->top_derr_[it->x_][ch];
@ -922,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it,

 //------------------------------------------------------------------------------

-static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
-                         uint8_t* const yuv_out, int mode) {
+static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
+                         VP8ModeScore* WEBP_RESTRICT const rd,
+                         uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
  const VP8Encoder* const enc = it->enc_;
  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
@ -994,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) {
  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
 }

-static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
+static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
+                            VP8ModeScore* WEBP_RESTRICT rd) {
  const int kNumBlocks = 16;
  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
  const int lambda = dqm->lambda_i16_;
@ -1054,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
 //------------------------------------------------------------------------------

 // return the cost array corresponding to the surrounding prediction modes.
-static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
+static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
                                     const uint8_t modes[16]) {
  const int preds_w = it->enc_->preds_w_;
  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
@ -1063,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
  return VP8FixedCostsI4[top][left];
 }

-static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
+static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
+                          VP8ModeScore* WEBP_RESTRICT const rd) {
  const VP8Encoder* const enc = it->enc_;
  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
  const int lambda = dqm->lambda_i4_;
@ -1159,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

 //------------------------------------------------------------------------------

-static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
+static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
+                       VP8ModeScore* WEBP_RESTRICT const rd) {
  const int kNumBlocks = 8;
  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
  const int lambda = dqm->lambda_uv_;
@ -1211,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
 //------------------------------------------------------------------------------
 // Final reconstruction and quantization.

-static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
+static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
+                           VP8ModeScore* WEBP_RESTRICT const rd) {
  const VP8Encoder* const enc = it->enc_;
  const int is_i16 = (it->mb_->type_ == 1);
  int nz = 0;
@ -1236,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
 }

 // Refine intra16/intra4 sub-modes based on distortion only (not rate).
-static void RefineUsingDistortion(VP8EncIterator* const it,
+static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
                                  int try_both_modes, int refine_uv_mode,
-                                  VP8ModeScore* const rd) {
+                                  VP8ModeScore* WEBP_RESTRICT const rd) {
  score_t best_score = MAX_COST;
  int nz = 0;
  int mode;
@ -1352,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
 //------------------------------------------------------------------------------
 // Entry point

-int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
+int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
+                VP8ModeScore* WEBP_RESTRICT const rd,
                VP8RDLevel rd_opt) {
  int is_skipped;
  const int method = it->enc_->method_;
--- a/thirdparty/libwebp/src/enc/vp8i_enc.h
+++ b/thirdparty/libwebp/src/enc/vp8i_enc.h
@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define ENC_MAJ_VERSION 1
 #define ENC_MIN_VERSION 2
-#define ENC_REV_VERSION 2
+#define ENC_REV_VERSION 4

 enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
       MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
@ -470,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc);
 // Sets up segment's quantization values, base_quant_ and filter strengths.
 void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
 // Pick best modes and fills the levels. Returns true if skipped.
-int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
+int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
+                VP8ModeScore* WEBP_RESTRICT const rd,
                VP8RDLevel rd_opt);

  // in alpha.c
@ -490,19 +491,24 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta);

  // misc utils for picture_*.c:

+// Returns true if 'picture' is non-NULL and dimensions/colorspace are within
+// their valid ranges. If returning false, the 'error_code' in 'picture' is
+// updated.
+int WebPValidatePicture(const WebPPicture* const picture);
+
 // Remove reference to the ARGB/YUVA buffer (doesn't free anything).
 void WebPPictureResetBuffers(WebPPicture* const picture);

-// Allocates ARGB buffer of given dimension (previous one is always free'd).
-// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
-// out-of-memory).
-int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
+// Allocates ARGB buffer according to set width/height (previous one is
+// always free'd). Preserves the YUV(A) buffer. Returns false in case of error
+// (invalid param, out-of-memory).
+int WebPPictureAllocARGB(WebPPicture* const picture);

-// Allocates YUVA buffer of given dimension (previous one is always free'd).
-// Uses picture->csp to determine whether an alpha buffer is needed.
+// Allocates YUVA buffer according to set width/height (previous one is always
+// free'd). Uses picture->csp to determine whether an alpha buffer is needed.
 // Preserves the ARGB buffer.
 // Returns false in case of error (invalid param, out-of-memory).
-int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
+int WebPPictureAllocYUVA(WebPPicture* const picture);

 // Replace samples that are fully transparent by 'color' to help compressibility
 // (no guarantee, though). Assumes pic->use_argb is true.
--- a/thirdparty/libwebp/src/enc/vp8l_enc.c
+++ b/thirdparty/libwebp/src/enc/vp8l_enc.c
--- a/thirdparty/libwebp/src/enc/vp8li_enc.h
+++ b/thirdparty/libwebp/src/enc/vp8li_enc.h
@ -89,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config,

 // Encodes the main image stream using the supplied bit writer.
 // If 'use_cache' is false, disables the use of color cache.
-WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-                                   const WebPPicture* const picture,
-                                   VP8LBitWriter* const bw, int use_cache);
+// Returns false in case of error (stored in picture->error_code).
+int VP8LEncodeStream(const WebPConfig* const config,
+                     const WebPPicture* const picture, VP8LBitWriter* const bw,
+                     int use_cache);

 #if (WEBP_NEAR_LOSSLESS == 1)
 // in near_lossless.c
@ -103,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
 //------------------------------------------------------------------------------
 // Image transforms in predictor.c.

-void VP8LResidualImage(int width, int height, int bits, int low_effort,
-                       uint32_t* const argb, uint32_t* const argb_scratch,
-                       uint32_t* const image, int near_lossless, int exact,
-                       int used_subtract_green);
+// pic and percent are for progress.
+// Returns false in case of error (stored in pic->error_code).
+int VP8LResidualImage(int width, int height, int bits, int low_effort,
+                      uint32_t* const argb, uint32_t* const argb_scratch,
+                      uint32_t* const image, int near_lossless, int exact,
+                      int used_subtract_green, const WebPPicture* const pic,
+                      int percent_range, int* const percent);

-void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
-                             uint32_t* const argb, uint32_t* image);
+int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
+                            uint32_t* const argb, uint32_t* image,
+                            const WebPPicture* const pic, int percent_range,
+                            int* const percent);

 //------------------------------------------------------------------------------

--- a/thirdparty/libwebp/src/enc/webp_enc.c
+++ b/thirdparty/libwebp/src/enc/webp_enc.c
@ -336,9 +336,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
  if (!WebPValidateConfig(config)) {
    return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
  }
-  if (pic->width <= 0 || pic->height <= 0) {
-    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
-  }
+  if (!WebPValidatePicture(pic)) return 0;
  if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) {
    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
  }
--- a/thirdparty/libwebp/src/mux/muxedit.c
+++ b/thirdparty/libwebp/src/mux/muxedit.c
@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) {
    err = ChunkAssignData(&chunk, data, copy_data, tag);                       \
    if (err == WEBP_MUX_OK) {                                                  \
      err = ChunkSetHead(&chunk, (LIST));                                      \
+      if (err != WEBP_MUX_OK) ChunkRelease(&chunk);                            \
    }                                                                          \
    return err;                                                                \
  }
--- a/thirdparty/libwebp/src/mux/muxi.h
+++ b/thirdparty/libwebp/src/mux/muxi.h
@ -29,7 +29,7 @@ extern "C" {

 #define MUX_MAJ_VERSION 1
 #define MUX_MIN_VERSION 2
-#define MUX_REV_VERSION 2
+#define MUX_REV_VERSION 4

 // Chunk object.
 typedef struct WebPChunk WebPChunk;
--- a/thirdparty/libwebp/src/mux/muxinternal.c
+++ b/thirdparty/libwebp/src/mux/muxinternal.c
@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk,

 WebPMuxError ChunkAppend(WebPChunk* const chunk,
                         WebPChunk*** const chunk_list) {
+  WebPMuxError err;
  assert(chunk_list != NULL && *chunk_list != NULL);

  if (**chunk_list == NULL) {
-    ChunkSetHead(chunk, *chunk_list);
+    err = ChunkSetHead(chunk, *chunk_list);
  } else {
    WebPChunk* last_chunk = **chunk_list;
    while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_;
-    ChunkSetHead(chunk, &last_chunk->next_);
-    *chunk_list = &last_chunk->next_;
+    err = ChunkSetHead(chunk, &last_chunk->next_);
+    if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_;
  }
-  return WEBP_MUX_OK;
+  return err;
 }

 //------------------------------------------------------------------------------
--- a/thirdparty/libwebp/src/webp/encode.h
+++ b/thirdparty/libwebp/src/webp/encode.h
@ -441,7 +441,7 @@ WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture,
 // the original dimension will be lost). Picture 'dst' need not be initialized
 // with WebPPictureInit() if it is different from 'src', since its content will
 // be overwritten.
-// Returns false in case of memory allocation error or invalid parameters.
+// Returns false in case of invalid parameters.
 WEBP_EXTERN int WebPPictureView(const WebPPicture* src,
                                int left, int top, int width, int height,
                                WebPPicture* dst);
@ -455,7 +455,7 @@ WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture);
 // dimension will be calculated preserving the aspect ratio.
 // No gamma correction is applied.
 // Returns false in case of error (invalid parameter or insufficient memory).
-WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height);
+WEBP_EXTERN int WebPPictureRescale(WebPPicture* picture, int width, int height);

 // Colorspace conversion function to import RGB samples.
 // Previous buffer will be free'd, if any.
@ -526,7 +526,7 @@ WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture);
 // Remove the transparency information (if present) by blending the color with
 // the background color 'background_rgb' (specified as 24bit RGB triplet).
 // After this call, all alpha values are reset to 0xff.
-WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
+WEBP_EXTERN void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb);

 //------------------------------------------------------------------------------
 // Main call