Merge pull request #57102 from akien-mga/libwebp-1.2.2

2022-02-03 15:16:57 +01:00 · 2022-02-03 15:16:57 +01:00 · 6acbd5f774
parent c4f38813b4 339dcd80ae
commit 6acbd5f774
28 changed files with 225 additions and 139 deletions
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@ -309,7 +309,7 @@ Files extracted from upstream source:
 ## libwebp

 - Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.2.1 (9ce5843dbabcfd3f7c39ec7ceba9cbeb213cbfdf, 2021)
+- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022)
 - License: BSD-3-Clause

 Files extracted from upstream source:
@ -317,10 +317,6 @@ Files extracted from upstream source:
 - `src/*` except from: `.am`, `.rc` and `.in` files
 - `AUTHORS`, `COPYING`, `PATENTS`

-Important: The files `utils/bit_reader_utils.{c,h}` have Godot-made
-changes to ensure they build for Javascript/HTML5. Those
-changes are marked with `// -- GODOT --` comments.
-

 ## mbedtls

--- a/thirdparty/libwebp/AUTHORS
+++ b/thirdparty/libwebp/AUTHORS
@ -32,6 +32,7 @@ Contributors:
 - Pascal Massimino (pascal dot massimino at gmail dot com)
 - Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
 - Pierre Joye (pierre dot php at gmail dot com)
+- Roberto Alanis (alanisbaez at google dot com)
 - Sam Clegg (sbc at chromium dot org)
 - Scott Hancher (seh at google dot com)
 - Scott LaVarnway (slavarnway at google dot com)
--- a/thirdparty/libwebp/src/dec/vp8_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8_dec.c
@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = {
  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
 };

-// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
+// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
  int v;
  if (!VP8GetBit(br, p[3], "coeffs")) {
--- a/thirdparty/libwebp/src/dec/vp8i_dec.h
+++ b/thirdparty/libwebp/src/dec/vp8i_dec.h
@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 1
 #define DEC_MIN_VERSION 2
-#define DEC_REV_VERSION 1
+#define DEC_REV_VERSION 2

 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
--- a/thirdparty/libwebp/src/dec/vp8l_dec.c
+++ b/thirdparty/libwebp/src/dec/vp8l_dec.c
@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
 // to 256 (green component values) + 24 (length prefix values)
 // + color_cache_size (between 0 and 2048).
 // All values computed for 8-bit first level lookup with Mark Adler's tool:
-// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
+// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
 #define FIXED_TABLE_SIZE (630 * 3 + 410)
 static const uint16_t kTableSize[12] = {
  FIXED_TABLE_SIZE + 654,
--- a/thirdparty/libwebp/src/demux/anim_decode.c
+++ b/thirdparty/libwebp/src/demux/anim_decode.c
@ -23,6 +23,14 @@

 #define NUM_CHANNELS 4

+// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA
+// buffer.
+#ifdef WORDS_BIGENDIAN
+#define CHANNEL_SHIFT(i) (24 - (i) * 8)
+#else
+#define CHANNEL_SHIFT(i) ((i) * 8)
+#endif
+
 typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
 static void BlendPixelRowNonPremult(uint32_t* const src,
                                    const uint32_t* const dst, int num_pixels);
@ -209,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
  const uint8_t dst_channel = (dst >> shift) & 0xff;
  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
  assert(blend_unscaled < (1ULL << 32) / scale);
-  return (blend_unscaled * scale) >> 24;
+  return (blend_unscaled * scale) >> CHANNEL_SHIFT(3);
 }

 // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
 static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
-  const uint8_t src_a = (src >> 24) & 0xff;
+  const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;

  if (src_a == 0) {
    return dst;
  } else {
-    const uint8_t dst_a = (dst >> 24) & 0xff;
+    const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff;
    // This is the approximate integer arithmetic for the actual formula:
    // dst_factor_a = (dst_a * (255 - src_a)) / 255.
    const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
    const uint8_t blend_a = src_a + dst_factor_a;
    const uint32_t scale = (1UL << 24) / blend_a;

-    const uint8_t blend_r =
-        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
-    const uint8_t blend_g =
-        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
-    const uint8_t blend_b =
-        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
+    const uint8_t blend_r = BlendChannelNonPremult(
+        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0));
+    const uint8_t blend_g = BlendChannelNonPremult(
+        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1));
+    const uint8_t blend_b = BlendChannelNonPremult(
+        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2));
    assert(src_a + dst_factor_a < 256);

-    return (blend_r << 0) |
-           (blend_g << 8) |
-           (blend_b << 16) |
-           ((uint32_t)blend_a << 24);
+    return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) |
+           ((uint32_t)blend_g << CHANNEL_SHIFT(1)) |
+           ((uint32_t)blend_b << CHANNEL_SHIFT(2)) |
+           ((uint32_t)blend_a << CHANNEL_SHIFT(3));
  }
 }

@ -247,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src,
                                    const uint32_t* const dst, int num_pixels) {
  int i;
  for (i = 0; i < num_pixels; ++i) {
-    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
+    const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
    if (src_alpha != 0xff) {
      src[i] = BlendPixelNonPremult(src[i], dst[i]);
    }
@ -264,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {

 // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
 static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
-  const uint8_t src_a = (src >> 24) & 0xff;
+  const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
  return src + ChannelwiseMultiply(dst, 256 - src_a);
 }

@ -274,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
                                 int num_pixels) {
  int i;
  for (i = 0; i < num_pixels; ++i) {
-    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
+    const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
    if (src_alpha != 0xff) {
      src[i] = BlendPixelPremult(src[i], dst[i]);
    }
--- a/thirdparty/libwebp/src/demux/demux.c
+++ b/thirdparty/libwebp/src/demux/demux.c
@ -25,7 +25,7 @@

 #define DMUX_MAJ_VERSION 1
 #define DMUX_MIN_VERSION 2
-#define DMUX_REV_VERSION 1
+#define DMUX_REV_VERSION 2

 typedef struct {
  size_t start_;        // start location of the data
--- a/thirdparty/libwebp/src/dsp/dsp.h
+++ b/thirdparty/libwebp/src/dsp/dsp.h
@ -119,7 +119,12 @@ extern "C" {
 #define WEBP_USE_NEON
 #endif

-#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
+// arm_neon.h.
+#if defined(_MSC_VER) && \
+  ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
+   (_MSC_VER >= 1920 && defined(_M_ARM64)))
 #define WEBP_USE_NEON
 #define WEBP_USE_INTRINSICS
 #endif
--- a/thirdparty/libwebp/src/dsp/enc_neon.c
+++ b/thirdparty/libwebp/src/dsp/enc_neon.c
@ -9,7 +9,7 @@
 //
 // ARM NEON version of speed-critical encoding functions.
 //
-// adapted from libvpx (http://www.webmproject.org/code/)
+// adapted from libvpx (https://www.webmproject.org/code/)

 #include "src/dsp/dsp.h"

--- a/thirdparty/libwebp/src/dsp/lossless.c
+++ b/thirdparty/libwebp/src/dsp/lossless.c
@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
 //------------------------------------------------------------------------------
 // Predictors

-uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor0_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  (void)top;
  (void)left;
  return ARGB_BLACK;
 }
-uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor1_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  (void)top;
-  return left;
+  return *left;
 }
-uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor2_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  (void)left;
  return top[0];
 }
-uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor3_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  (void)left;
  return top[1];
 }
-uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor4_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  (void)left;
  return top[-1];
 }
-uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average3(left, top[0], top[1]);
+uint32_t VP8LPredictor5_C(const uint32_t* const left,
+                          const uint32_t* const top) {
+  const uint32_t pred = Average3(*left, top[0], top[1]);
  return pred;
 }
-uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2(left, top[-1]);
+uint32_t VP8LPredictor6_C(const uint32_t* const left,
+                          const uint32_t* const top) {
+  const uint32_t pred = Average2(*left, top[-1]);
  return pred;
 }
-uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2(left, top[0]);
+uint32_t VP8LPredictor7_C(const uint32_t* const left,
+                          const uint32_t* const top) {
+  const uint32_t pred = Average2(*left, top[0]);
  return pred;
 }
-uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor8_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  const uint32_t pred = Average2(top[-1], top[0]);
  (void)left;
  return pred;
 }
-uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) {
+uint32_t VP8LPredictor9_C(const uint32_t* const left,
+                          const uint32_t* const top) {
  const uint32_t pred = Average2(top[0], top[1]);
  (void)left;
  return pred;
 }
-uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+uint32_t VP8LPredictor10_C(const uint32_t* const left,
+                           const uint32_t* const top) {
+  const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
  return pred;
 }
-uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Select(top[0], left, top[-1]);
+uint32_t VP8LPredictor11_C(const uint32_t* const left,
+                           const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], *left, top[-1]);
  return pred;
 }
-uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+uint32_t VP8LPredictor12_C(const uint32_t* const left,
+                           const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
  return pred;
 }
-uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+uint32_t VP8LPredictor13_C(const uint32_t* const left,
+                           const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
  return pred;
 }

--- a/thirdparty/libwebp/src/dsp/lossless.h
+++ b/thirdparty/libwebp/src/dsp/lossless.h
@ -28,23 +28,38 @@ extern "C" {
 //------------------------------------------------------------------------------
 // Decoding

-typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
+typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
+                                      const uint32_t* const top);
 extern VP8LPredictorFunc VP8LPredictors[16];

-uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top);
-uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top);
+uint32_t VP8LPredictor0_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor1_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor2_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor3_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor4_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor5_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor6_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor7_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor8_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor9_C(const uint32_t* const left,
+                          const uint32_t* const top);
+uint32_t VP8LPredictor10_C(const uint32_t* const left,
+                           const uint32_t* const top);
+uint32_t VP8LPredictor11_C(const uint32_t* const left,
+                           const uint32_t* const top);
+uint32_t VP8LPredictor12_C(const uint32_t* const left,
+                           const uint32_t* const top);
+uint32_t VP8LPredictor13_C(const uint32_t* const left,
+                           const uint32_t* const top);

 // These Add/Sub function expects upper[-1] and out[-1] to be readable.
 typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
--- a/thirdparty/libwebp/src/dsp/lossless_common.h
+++ b/thirdparty/libwebp/src/dsp/lossless_common.h
@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
  int x;                                                             \
  assert(upper != NULL);                                             \
  for (x = 0; x < num_pixels; ++x) {                                 \
-    const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x);        \
+    const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x);       \
    out[x] = VP8LAddPixels(in[x], pred);                             \
  }                                                                  \
 }
--- a/thirdparty/libwebp/src/dsp/lossless_enc.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc.c
@ -745,7 +745,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in,              \
  assert(upper != NULL);                                                   \
  for (x = 0; x < num_pixels; ++x) {                                       \
    const uint32_t pred =                                                  \
-        VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x);              \
+        VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x);             \
    out[x] = VP8LSubPixels(in[x], pred);                                   \
  }                                                                        \
 }
--- a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
  return Average2(Average2(a0, a1), Average2(a2, a3));
 }

-static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
-  return Average3(left, top[0], top[1]);
+static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
+                                     const uint32_t* const top) {
+  return Average3(*left, top[0], top[1]);
 }

-static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
-  return Average2(left, top[-1]);
+static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
+                                     const uint32_t* const top) {
+  return Average2(*left, top[-1]);
 }

-static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
-  return Average2(left, top[0]);
+static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
+                                     const uint32_t* const top) {
+  return Average2(*left, top[0]);
 }

-static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
+                                     const uint32_t* const top) {
  (void)left;
  return Average2(top[-1], top[0]);
 }

-static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
+                                     const uint32_t* const top) {
  (void)left;
  return Average2(top[0], top[1]);
 }

-static uint32_t Predictor10_MIPSdspR2(uint32_t left,
+static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
                                      const uint32_t* const top) {
-  return Average4(left, top[-1], top[0], top[1]);
+  return Average4(*left, top[-1], top[0], top[1]);
 }

-static uint32_t Predictor11_MIPSdspR2(uint32_t left,
+static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
                                      const uint32_t* const top) {
-  return Select(top[0], left, top[-1]);
+  return Select(top[0], *left, top[-1]);
 }

-static uint32_t Predictor12_MIPSdspR2(uint32_t left,
+static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
                                      const uint32_t* const top) {
-  return ClampedAddSubtractFull(left, top[0], top[-1]);
+  return ClampedAddSubtractFull(*left, top[0], top[-1]);
 }

-static uint32_t Predictor13_MIPSdspR2(uint32_t left,
+static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
                                      const uint32_t* const top) {
-  return ClampedAddSubtractHalf(left, top[0], top[-1]);
+  return ClampedAddSubtractHalf(*left, top[0], top[-1]);
 }

 // Add green to blue and red channels (i.e. perform the inverse transform of
--- a/thirdparty/libwebp/src/dsp/lossless_neon.c
+++ b/thirdparty/libwebp/src/dsp/lossless_neon.c
@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1,
  return avg;
 }

-static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) {
-  return Average3_NEON(left, top[0], top[1]);
+static uint32_t Predictor5_NEON(const uint32_t* const left,
+                                const uint32_t* const top) {
+  return Average3_NEON(*left, top[0], top[1]);
 }
-static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) {
-  return Average2_NEON(left, top[-1]);
+static uint32_t Predictor6_NEON(const uint32_t* const left,
+                                const uint32_t* const top) {
+  return Average2_NEON(*left, top[-1]);
 }
-static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) {
-  return Average2_NEON(left, top[0]);
+static uint32_t Predictor7_NEON(const uint32_t* const left,
+                                const uint32_t* const top) {
+  return Average2_NEON(*left, top[0]);
 }
-static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) {
-  return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]);
+static uint32_t Predictor13_NEON(const uint32_t* const left,
+                                 const uint32_t* const top) {
+  return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]);
 }

 // Batch versions of those functions.
--- a/thirdparty/libwebp/src/dsp/lossless_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c
@ -138,42 +138,51 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
  return output;
 }

-static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
+static uint32_t Predictor5_SSE2(const uint32_t* const left,
+                                const uint32_t* const top) {
+  const uint32_t pred = Average3_SSE2(*left, top[0], top[1]);
  return pred;
 }
-static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2_SSE2(left, top[-1]);
+static uint32_t Predictor6_SSE2(const uint32_t* const left,
+                                const uint32_t* const top) {
+  const uint32_t pred = Average2_SSE2(*left, top[-1]);
  return pred;
 }
-static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average2_SSE2(left, top[0]);
+static uint32_t Predictor7_SSE2(const uint32_t* const left,
+                                const uint32_t* const top) {
+  const uint32_t pred = Average2_SSE2(*left, top[0]);
  return pred;
 }
-static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor8_SSE2(const uint32_t* const left,
+                                const uint32_t* const top) {
  const uint32_t pred = Average2_SSE2(top[-1], top[0]);
  (void)left;
  return pred;
 }
-static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
+static uint32_t Predictor9_SSE2(const uint32_t* const left,
+                                const uint32_t* const top) {
  const uint32_t pred = Average2_SSE2(top[0], top[1]);
  (void)left;
  return pred;
 }
-static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
+static uint32_t Predictor10_SSE2(const uint32_t* const left,
+                                 const uint32_t* const top) {
+  const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]);
  return pred;
 }
-static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
+static uint32_t Predictor11_SSE2(const uint32_t* const left,
+                                 const uint32_t* const top) {
+  const uint32_t pred = Select_SSE2(top[0], *left, top[-1]);
  return pred;
 }
-static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
+static uint32_t Predictor12_SSE2(const uint32_t* const left,
+                                 const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]);
  return pred;
 }
-static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
-  const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
+static uint32_t Predictor13_SSE2(const uint32_t* const left,
+                                 const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]);
  return pred;
 }

--- a/thirdparty/libwebp/src/dsp/msa_macro.h
+++ b/thirdparty/libwebp/src/dsp/msa_macro.h
@ -14,6 +14,10 @@
 #ifndef WEBP_DSP_MSA_MACRO_H_
 #define WEBP_DSP_MSA_MACRO_H_

+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_MSA)
+
 #include <stdint.h>
 #include <msa.h>

@ -1389,4 +1393,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
 } while (0)
 #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)

+#endif  // WEBP_USE_MSA
 #endif  // WEBP_DSP_MSA_MACRO_H_
--- a/thirdparty/libwebp/src/dsp/neon.h
+++ b/thirdparty/libwebp/src/dsp/neon.h
@ -12,10 +12,12 @@
 #ifndef WEBP_DSP_NEON_H_
 #define WEBP_DSP_NEON_H_

-#include <arm_neon.h>
-
 #include "src/dsp/dsp.h"

+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+
 // Right now, some intrinsics functions seem slower, so we disable them
 // everywhere except newer clang/gcc or aarch64 where the inline assembly is
 // incompatible.
@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
 } while (0)
 #endif

+#endif  // WEBP_USE_NEON
 #endif  // WEBP_DSP_NEON_H_
--- a/thirdparty/libwebp/src/dsp/yuv.h
+++ b/thirdparty/libwebp/src/dsp/yuv.h
@ -10,7 +10,7 @@
 // inline YUV<->RGB conversion function
 //
 // The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
-// More information at: http://en.wikipedia.org/wiki/YCbCr
+// More information at: https://en.wikipedia.org/wiki/YCbCr
 // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
--- a/thirdparty/libwebp/src/enc/frame_enc.c
+++ b/thirdparty/libwebp/src/enc/frame_enc.c
@ -778,6 +778,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
  // Roughly refresh the proba eight times per pass
  int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
  int num_pass_left = enc->config_->pass;
+  int remaining_progress = 40;  // percents
  const int do_search = enc->do_search_;
  VP8EncIterator it;
  VP8EncProba* const proba = &enc->proba_;
@ -805,6 +806,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
    uint64_t size_p0 = 0;
    uint64_t distortion = 0;
    int cnt = max_count;
+    // The final number of passes is not trivial to know in advance.
+    const int pass_progress = remaining_progress / (2 + num_pass_left);
+    remaining_progress -= pass_progress;
    VP8IteratorInit(enc, &it);
    SetLoopParams(enc, stats.q);
    if (is_last_pass) {
@ -832,7 +836,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
        StoreSideInfo(&it);
        VP8StoreFilterStats(&it);
        VP8IteratorExport(&it);
-        ok = VP8IteratorProgress(&it, 20);
+        ok = VP8IteratorProgress(&it, pass_progress);
      }
      VP8IteratorSaveBoundary(&it);
    } while (ok && VP8IteratorNext(&it));
@ -878,7 +882,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
    ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
                       (const uint8_t*)proba->coeffs_, 1);
  }
-  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
+                                &enc->percent_);
  return PostLoopFinalize(&it, ok);
 }

--- a/thirdparty/libwebp/src/enc/predictor_enc.c
+++ b/thirdparty/libwebp/src/enc/predictor_enc.c
@ -249,7 +249,7 @@ static WEBP_INLINE void GetResidual(
      } else if (x == 0) {
        predict = upper_row[x];  // Top.
      } else {
-        predict = pred_func(current_row[x - 1], upper_row + x);
+        predict = pred_func(&current_row[x - 1], upper_row + x);
      }
 #if (WEBP_NEAR_LOSSLESS == 1)
      if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
--- a/thirdparty/libwebp/src/enc/quant_enc.c
+++ b/thirdparty/libwebp/src/enc/quant_enc.c
@ -585,6 +585,9 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
  return rate * lambda + RD_DISTO_MULT * distortion;
 }

+// Coefficient type.
+enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
+
 static int TrellisQuantizeBlock(const VP8Encoder* const enc,
                                int16_t in[16], int16_t out[16],
                                int ctx0, int coeff_type,
@ -593,7 +596,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
  CostArrayPtr const costs =
      (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
-  const int first = (coeff_type == 0) ? 1 : 0;
+  const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
  Node nodes[16][NUM_NODES];
  ScoreState score_states[2][NUM_NODES];
  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
@ -657,16 +660,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
    // test all alternate level values around level0.
    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
      Node* const cur = &NODE(n, m);
-      int level = level0 + m;
+      const int level = level0 + m;
      const int ctx = (level > 2) ? 2 : level;
      const int band = VP8EncBands[n + 1];
      score_t base_score;
-      score_t best_cur_score = MAX_COST;
-      int best_prev = 0;   // default, in case
+      score_t best_cur_score;
+      int best_prev;
+      score_t cost, score;

-      ss_cur[m].score = MAX_COST;
      ss_cur[m].costs = costs[n + 1][ctx];
      if (level < 0 || level > thresh_level) {
+        ss_cur[m].score = MAX_COST;
        // Node is dead.
        continue;
      }
@ -682,18 +686,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
      }

      // Inspect all possible non-dead predecessors. Retain only the best one.
-      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
+      // The base_score is added to all scores so it is only added for the final
+      // value after the loop.
+      cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level);
+      best_cur_score =
+          ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0);
+      best_prev = -MIN_DELTA;
+      for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) {
        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
        // eliminated since their score can't be better than the current best.
-        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
+        cost = VP8LevelCost(ss_prev[p].costs, level);
        // Examine node assuming it's a non-terminal one.
-        const score_t score =
-            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
+        score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
        if (score < best_cur_score) {
          best_cur_score = score;
          best_prev = p;
        }
      }
+      best_cur_score += base_score;
      // Store best finding in current node.
      cur->sign = sign;
      cur->level = level;
@ -701,11 +711,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
      ss_cur[m].score = best_cur_score;

      // Now, record best terminal node (and thus best entry in the graph).
-      if (level != 0) {
+      if (level != 0 && best_cur_score < best_score) {
        const score_t last_pos_cost =
            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
        const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
-        const score_t score = best_cur_score + last_pos_score;
+        score = best_cur_score + last_pos_score;
        if (score < best_score) {
          best_score = score;
          best_path[0] = n;                     // best eob position
@ -717,10 +727,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
  }

  // Fresh start
-  memset(in + first, 0, (16 - first) * sizeof(*in));
-  memset(out + first, 0, (16 - first) * sizeof(*out));
+  // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case.
+  if (coeff_type == TYPE_I16_AC) {
+    memset(in + 1, 0, 15 * sizeof(*in));
+    memset(out + 1, 0, 15 * sizeof(*out));
+  } else {
+    memset(in, 0, 16 * sizeof(*in));
+    memset(out, 0, 16 * sizeof(*out));
+  }
  if (best_path[0] == -1) {
-    return 0;   // skip!
+    return 0;  // skip!
  }

  {
@ -775,9 +791,9 @@ static int ReconstructIntra16(VP8EncIterator* const it,
    for (y = 0, n = 0; y < 4; ++y) {
      for (x = 0; x < 4; ++x, ++n) {
        const int ctx = it->top_nz_[x] + it->left_nz_[y];
-        const int non_zero =
-            TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
-                                 &dqm->y1_, dqm->lambda_trellis_i16_);
+        const int non_zero = TrellisQuantizeBlock(
+            enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
+            dqm->lambda_trellis_i16_);
        it->top_nz_[x] = it->left_nz_[y] = non_zero;
        rd->y_ac_levels[n][0] = 0;
        nz |= non_zero << n;
@ -818,7 +834,7 @@ static int ReconstructIntra4(VP8EncIterator* const it,
  if (DO_TRELLIS_I4 && it->do_trellis_) {
    const int x = it->i4_ & 3, y = it->i4_ >> 2;
    const int ctx = it->top_nz_[x] + it->left_nz_[y];
-    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
+    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
                              dqm->lambda_trellis_i4_);
  } else {
    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
@ -927,9 +943,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
      for (y = 0; y < 2; ++y) {
        for (x = 0; x < 2; ++x, ++n) {
          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
-          const int non_zero =
-              TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
-                                   &dqm->uv_, dqm->lambda_trellis_uv_);
+          const int non_zero = TrellisQuantizeBlock(
+              enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
+              dqm->lambda_trellis_uv_);
          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
          nz |= non_zero << n;
        }
--- a/thirdparty/libwebp/src/enc/vp8i_enc.h
+++ b/thirdparty/libwebp/src/enc/vp8i_enc.h
@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define ENC_MAJ_VERSION 1
 #define ENC_MIN_VERSION 2
-#define ENC_REV_VERSION 1
+#define ENC_REV_VERSION 2

 enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
       MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
--- a/thirdparty/libwebp/src/mux/muxi.h
+++ b/thirdparty/libwebp/src/mux/muxi.h
@ -29,7 +29,7 @@ extern "C" {

 #define MUX_MAJ_VERSION 1
 #define MUX_MIN_VERSION 2
-#define MUX_REV_VERSION 1
+#define MUX_REV_VERSION 2

 // Chunk object.
 typedef struct WebPChunk WebPChunk;
--- a/thirdparty/libwebp/src/utils/huffman_encode_utils.c
+++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.c
@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree,
 // especially when population counts are longer than 2**tree_limit, but
 // we are not planning to use this with extremely long blocks.
 //
-// See http://en.wikipedia.org/wiki/Huffman_coding
+// See https://en.wikipedia.org/wiki/Huffman_coding
 static void GenerateOptimalTree(const uint32_t* const histogram,
                                int histogram_size,
                                HuffmanTree* tree, int tree_depth_limit,
--- a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c
+++ b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c
@ -30,7 +30,7 @@

 #define DFIX 4           // extra precision for ordered dithering
 #define DSIZE 4          // dithering size (must be a power of two)
-// cf. http://en.wikipedia.org/wiki/Ordered_dithering
+// cf. https://en.wikipedia.org/wiki/Ordered_dithering
 static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
  {  0,  8,  2, 10 },     // coefficients are in DFIX fixed-point precision
  { 12,  4, 14,  6 },
--- a/thirdparty/libwebp/src/utils/utils.c
+++ b/thirdparty/libwebp/src/utils/utils.c
@ -23,7 +23,7 @@
 // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
 // and not multi-thread safe!).
 // An interesting alternative is valgrind's 'massif' tool:
-//    http://valgrind.org/docs/manual/ms-manual.html
+//    https://valgrind.org/docs/manual/ms-manual.html
 // Here is an example command line:
 /*    valgrind --tool=massif --massif-out-file=massif.out \
               --stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc
--- a/thirdparty/libwebp/src/webp/decode.h
+++ b/thirdparty/libwebp/src/webp/decode.h
@ -85,7 +85,7 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
 // Upon return, the Y buffer has a stride returned as '*stride', while U and V
 // have a common stride returned as '*uv_stride'.
 // Return NULL in case of error.
-// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
+// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr
 WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
                                   int* width, int* height,
                                   uint8_t** u, uint8_t** v,