From 2529ad6a6427a27f47c1ef8ce2aac608f6c20ed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Mon, 7 Aug 2023 18:12:21 +0200 Subject: [PATCH] libwebp: Sync with upstream 1.3.1 https://chromium.googlesource.com/webm/libwebp/+/1.3.1/NEWS --- thirdparty/README.md | 2 +- thirdparty/libwebp/AUTHORS | 2 + thirdparty/libwebp/sharpyuv/sharpyuv.c | 1 + thirdparty/libwebp/sharpyuv/sharpyuv.h | 2 +- thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c | 1 + thirdparty/libwebp/src/dec/tree_dec.c | 3 +- thirdparty/libwebp/src/dec/vp8_dec.c | 2 + thirdparty/libwebp/src/dec/vp8i_dec.h | 2 +- thirdparty/libwebp/src/dec/webp_dec.c | 31 ++- thirdparty/libwebp/src/demux/demux.c | 2 +- thirdparty/libwebp/src/dsp/alpha_processing.c | 1 + thirdparty/libwebp/src/dsp/cost.c | 1 + thirdparty/libwebp/src/dsp/cost_neon.c | 4 +- thirdparty/libwebp/src/dsp/cpu.c | 6 + thirdparty/libwebp/src/dsp/cpu.h | 36 ++- thirdparty/libwebp/src/dsp/dec.c | 1 + thirdparty/libwebp/src/dsp/dec_neon.c | 4 +- thirdparty/libwebp/src/dsp/enc.c | 1 + thirdparty/libwebp/src/dsp/enc_neon.c | 4 +- thirdparty/libwebp/src/dsp/enc_sse2.c | 242 ++++++++++++++---- thirdparty/libwebp/src/dsp/filters.c | 1 + thirdparty/libwebp/src/dsp/lossless.c | 1 + thirdparty/libwebp/src/dsp/lossless_enc.c | 1 + .../libwebp/src/dsp/lossless_enc_neon.c | 2 +- thirdparty/libwebp/src/dsp/lossless_neon.c | 2 +- thirdparty/libwebp/src/dsp/neon.h | 4 +- thirdparty/libwebp/src/dsp/quant.h | 2 +- thirdparty/libwebp/src/dsp/rescaler.c | 1 + thirdparty/libwebp/src/dsp/ssim.c | 1 + thirdparty/libwebp/src/dsp/upsampling.c | 1 + thirdparty/libwebp/src/dsp/upsampling_neon.c | 2 +- thirdparty/libwebp/src/dsp/yuv.c | 1 + thirdparty/libwebp/src/enc/alpha_enc.c | 20 +- thirdparty/libwebp/src/enc/analysis_enc.c | 4 + .../libwebp/src/enc/backward_references_enc.c | 9 +- thirdparty/libwebp/src/enc/frame_enc.c | 8 +- thirdparty/libwebp/src/enc/picture_csp_enc.c | 5 +- .../libwebp/src/enc/picture_rescale_enc.c | 20 +- thirdparty/libwebp/src/enc/syntax_enc.c | 6 +- thirdparty/libwebp/src/enc/vp8i_enc.h | 2 +- thirdparty/libwebp/src/enc/vp8l_enc.c | 38 +-- thirdparty/libwebp/src/enc/webp_enc.c | 10 +- thirdparty/libwebp/src/mux/muxi.h | 2 +- thirdparty/libwebp/src/mux/muxread.c | 7 +- .../libwebp/src/utils/bit_reader_utils.c | 3 +- .../libwebp/src/utils/bit_reader_utils.h | 3 +- thirdparty/libwebp/src/webp/decode.h | 5 +- 47 files changed, 359 insertions(+), 150 deletions(-) diff --git a/thirdparty/README.md b/thirdparty/README.md index 4bb1e8e1d1a..9e45c8c5eb3 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -356,7 +356,7 @@ Files extracted from upstream source: ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ -- Version: 1.3.0 (b557776962a3dcc985d83bd4ed94e1e2e50d0fa2, 2022) +- Version: 1.3.1 (fd7bb21c0cb56e8a82e9bfa376164b842f433f3b, 2023) - License: BSD-3-Clause Files extracted from upstream source: diff --git a/thirdparty/libwebp/AUTHORS b/thirdparty/libwebp/AUTHORS index 2f0c537d1c5..8359b20da9d 100644 --- a/thirdparty/libwebp/AUTHORS +++ b/thirdparty/libwebp/AUTHORS @@ -32,6 +32,7 @@ Contributors: - Mislav Bradac (mislavm at google dot com) - Nico Weber (thakis at chromium dot org) - Noel Chromium (noel at chromium dot org) +- Nozomi Isozaki (nontan at pixiv dot co dot jp) - Oliver Wolff (oliver dot wolff at qt dot io) - Owen Rodley (orodley at google dot com) - Parag Salasakar (img dot mips1 at gmail dot com) @@ -47,6 +48,7 @@ Contributors: - Somnath Banerjee (somnath dot banerjee at gmail dot com) - Sriraman Tallam (tmsriram at google dot com) - Tamar Levy (tamar dot levy at intel dot com) +- Thiago Perrotta (tperrotta at google dot com) - Timothy Gu (timothygu99 at gmail dot com) - Urvang Joshi (urvang at google dot com) - Vikas Arora (vikasa at google dot com) diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv.c b/thirdparty/libwebp/sharpyuv/sharpyuv.c index 7de34fb0b25..a0745648884 100644 --- a/thirdparty/libwebp/sharpyuv/sharpyuv.c +++ b/thirdparty/libwebp/sharpyuv/sharpyuv.c @@ -440,6 +440,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, // users can declare it as extern and call it with an alternate VP8CPUInfo // function. +extern VP8CPUInfo SharpYuvGetCPUInfo; SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); void SharpYuvInit(VP8CPUInfo cpu_info_func) { static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv.h b/thirdparty/libwebp/sharpyuv/sharpyuv.h index 181b20a0bc4..7b9904d6f95 100644 --- a/thirdparty/libwebp/sharpyuv/sharpyuv.h +++ b/thirdparty/libwebp/sharpyuv/sharpyuv.h @@ -37,7 +37,7 @@ extern "C" { // SharpYUV API version following the convention from semver.org #define SHARPYUV_VERSION_MAJOR 0 #define SHARPYUV_VERSION_MINOR 2 -#define SHARPYUV_VERSION_PATCH 0 +#define SHARPYUV_VERSION_PATCH 1 // Version as a uint32_t. The major number is the high 8 bits. // The minor number is the middle 8 bits. The patch number is the low 16 bits. #define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \ diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c index 31c272c4085..0da3efc0b81 100644 --- a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c @@ -72,6 +72,7 @@ void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, const uint16_t* best_y, uint16_t* out, int bit_depth); +extern VP8CPUInfo SharpYuvGetCPUInfo; extern void InitSharpYuvSSE2(void); extern void InitSharpYuvNEON(void); diff --git a/thirdparty/libwebp/src/dec/tree_dec.c b/thirdparty/libwebp/src/dec/tree_dec.c index 1c6fdea27cc..24346059532 100644 --- a/thirdparty/libwebp/src/dec/tree_dec.c +++ b/thirdparty/libwebp/src/dec/tree_dec.c @@ -12,10 +12,11 @@ // Author: Skal (pascal.massimino@gmail.com) #include "src/dec/vp8i_dec.h" +#include "src/dsp/cpu.h" #include "src/utils/bit_reader_inl_utils.h" #if !defined(USE_GENERIC_TREE) -#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then. #define USE_GENERIC_TREE 1 // ALTERNATE_CODE #else diff --git a/thirdparty/libwebp/src/dec/vp8_dec.c b/thirdparty/libwebp/src/dec/vp8_dec.c index 2003935ec46..20b92e84c4f 100644 --- a/thirdparty/libwebp/src/dec/vp8_dec.c +++ b/thirdparty/libwebp/src/dec/vp8_dec.c @@ -494,6 +494,8 @@ static int GetCoeffsAlt(VP8BitReader* const br, return 16; } +extern VP8CPUInfo VP8GetCPUInfo; + WEBP_DSP_INIT_FUNC(InitGetCoeffs) { if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { GetCoeffs = GetCoeffsAlt; diff --git a/thirdparty/libwebp/src/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h index 83791ecd25d..1ae4ff62f2a 100644 --- a/thirdparty/libwebp/src/dec/vp8i_dec.h +++ b/thirdparty/libwebp/src/dec/vp8i_dec.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 #define DEC_MIN_VERSION 3 -#define DEC_REV_VERSION 0 +#define DEC_REV_VERSION 1 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/thirdparty/libwebp/src/dec/webp_dec.c b/thirdparty/libwebp/src/dec/webp_dec.c index 3f4f7bb6594..f557868b998 100644 --- a/thirdparty/libwebp/src/dec/webp_dec.c +++ b/thirdparty/libwebp/src/dec/webp_dec.c @@ -658,19 +658,26 @@ uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height, uint8_t** u, uint8_t** v, int* stride, int* uv_stride) { - WebPDecBuffer output; // only to preserve the side-infos - uint8_t* const out = Decode(MODE_YUV, data, data_size, - width, height, &output); - - if (out != NULL) { - const WebPYUVABuffer* const buf = &output.u.YUVA; - *u = buf->u; - *v = buf->v; - *stride = buf->y_stride; - *uv_stride = buf->u_stride; - assert(buf->u_stride == buf->v_stride); + // data, width and height are checked by Decode(). + if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) { + return NULL; + } + + { + WebPDecBuffer output; // only to preserve the side-infos + uint8_t* const out = Decode(MODE_YUV, data, data_size, + width, height, &output); + + if (out != NULL) { + const WebPYUVABuffer* const buf = &output.u.YUVA; + *u = buf->u; + *v = buf->v; + *stride = buf->y_stride; + *uv_stride = buf->u_stride; + assert(buf->u_stride == buf->v_stride); + } + return out; } - return out; } static void DefaultFeatures(WebPBitstreamFeatures* const features) { diff --git a/thirdparty/libwebp/src/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c index 324e5eb993a..fd45a2500e4 100644 --- a/thirdparty/libwebp/src/demux/demux.c +++ b/thirdparty/libwebp/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 1 #define DMUX_MIN_VERSION 3 -#define DMUX_REV_VERSION 0 +#define DMUX_REV_VERSION 1 typedef struct { size_t start_; // start location of the data diff --git a/thirdparty/libwebp/src/dsp/alpha_processing.c b/thirdparty/libwebp/src/dsp/alpha_processing.c index 1892929a431..1d152f24dad 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing.c @@ -425,6 +425,7 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); //------------------------------------------------------------------------------ // Init function +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPInitAlphaProcessingMIPSdspR2(void); extern void WebPInitAlphaProcessingSSE2(void); extern void WebPInitAlphaProcessingSSE41(void); diff --git a/thirdparty/libwebp/src/dsp/cost.c b/thirdparty/libwebp/src/dsp/cost.c index 460ec4f2a76..73d2140177c 100644 --- a/thirdparty/libwebp/src/dsp/cost.c +++ b/thirdparty/libwebp/src/dsp/cost.c @@ -374,6 +374,7 @@ static void SetResidualCoeffs_C(const int16_t* const coeffs, VP8GetResidualCostFunc VP8GetResidualCost; VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8EncDspCostInitMIPS32(void); extern void VP8EncDspCostInitMIPSdspR2(void); extern void VP8EncDspCostInitSSE2(void); diff --git a/thirdparty/libwebp/src/dsp/cost_neon.c b/thirdparty/libwebp/src/dsp/cost_neon.c index 8cc8ce58aa1..6582669cb3f 100644 --- a/thirdparty/libwebp/src/dsp/cost_neon.c +++ b/thirdparty/libwebp/src/dsp/cost_neon.c @@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1)); const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position)); -#ifdef __aarch64__ +#if WEBP_AARCH64 res->last = vmaxvq_u8(masked) - 1; #else const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked)); @@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0); --res->last; -#endif // __aarch64__ +#endif // WEBP_AARCH64 res->coeffs = coeffs; } diff --git a/thirdparty/libwebp/src/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c index 62de73f750d..2234c77b356 100644 --- a/thirdparty/libwebp/src/dsp/cpu.c +++ b/thirdparty/libwebp/src/dsp/cpu.c @@ -173,6 +173,7 @@ static int x86CPUInfo(CPUFeature feature) { } return 0; } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. static int AndroidCPUInfo(CPUFeature feature) { @@ -184,6 +185,7 @@ static int AndroidCPUInfo(CPUFeature feature) { } return 0; } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test // Use compile flags as an indicator of SIMD support instead of a runtime check. @@ -208,6 +210,7 @@ static int wasmCPUInfo(CPUFeature feature) { } return 0; } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; #elif defined(WEBP_HAVE_NEON) // In most cases this function doesn't check for NEON support (it's assumed by @@ -236,6 +239,7 @@ static int armCPUInfo(CPUFeature feature) { return 1; #endif } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = armCPUInfo; #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ defined(WEBP_USE_MSA) @@ -247,7 +251,9 @@ static int mipsCPUInfo(CPUFeature feature) { } } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; #else +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = NULL; #endif diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h index be80727c0db..c86540f2801 100644 --- a/thirdparty/libwebp/src/dsp/cpu.h +++ b/thirdparty/libwebp/src/dsp/cpu.h @@ -43,6 +43,9 @@ #define __has_builtin(x) 0 #endif +//------------------------------------------------------------------------------ +// x86 defines. + #if !defined(HAVE_CONFIG_H) #if defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) @@ -80,6 +83,9 @@ #undef WEBP_MSC_SSE41 #undef WEBP_MSC_SSE2 +//------------------------------------------------------------------------------ +// Arm defines. + // The intrinsics currently cause compiler errors with arm-nacl-gcc and the // inline assembly would need to be modified for use with Native Client. #if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ @@ -98,16 +104,26 @@ // inclusion of arm64_neon.h; Visual Studio 2019 includes this file in // arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with // vtbl4_u8(); a fix was made in 16.6. -#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ - (_MSC_VER >= 1926 && defined(_M_ARM64))) +#if defined(_MSC_VER) && \ + ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ + (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC)))) #define WEBP_USE_NEON #define WEBP_USE_INTRINSICS #endif +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define WEBP_AARCH64 1 +#else +#define WEBP_AARCH64 0 +#endif + #if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) #define WEBP_HAVE_NEON #endif +//------------------------------------------------------------------------------ +// MIPS defines. + #if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \ (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) #define WEBP_USE_MIPS32 @@ -123,6 +139,8 @@ #define WEBP_USE_MSA #endif +//------------------------------------------------------------------------------ + #ifndef WEBP_DSP_OMIT_C_CODE #define WEBP_DSP_OMIT_C_CODE 1 #endif @@ -133,13 +151,14 @@ #define WEBP_NEON_OMIT_C_CODE 0 #endif -#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \ - defined(__aarch64__)) +#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) #define WEBP_NEON_WORK_AROUND_GCC 1 #else #define WEBP_NEON_WORK_AROUND_GCC 0 #endif +//------------------------------------------------------------------------------ + // This macro prevents thread_sanitizer from reporting known concurrent writes. #define WEBP_TSAN_IGNORE_FUNCTION #if defined(__has_feature) @@ -241,16 +260,7 @@ typedef enum { kMSA } CPUFeature; -#ifdef __cplusplus -extern "C" { -#endif - // returns true if the CPU supports the feature. typedef int (*VP8CPUInfo)(CPUFeature feature); -WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - -#ifdef __cplusplus -} // extern "C" -#endif #endif // WEBP_DSP_CPU_H_ diff --git a/thirdparty/libwebp/src/dsp/dec.c b/thirdparty/libwebp/src/dsp/dec.c index 537c7012824..33d8df8a621 100644 --- a/thirdparty/libwebp/src/dsp/dec.c +++ b/thirdparty/libwebp/src/dsp/dec.c @@ -734,6 +734,7 @@ VP8SimpleFilterFunc VP8SimpleHFilter16i; void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst, int dst_stride); +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8DspInitSSE2(void); extern void VP8DspInitSSE41(void); extern void VP8DspInitNEON(void); diff --git a/thirdparty/libwebp/src/dsp/dec_neon.c b/thirdparty/libwebp/src/dsp/dec_neon.c index fa851707e26..22784cf15ae 100644 --- a/thirdparty/libwebp/src/dsp/dec_neon.c +++ b/thirdparty/libwebp/src/dsp/dec_neon.c @@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x8_t A = vld1_u8(dst - BPS); // top row -#if defined(__aarch64__) +#if WEBP_AARCH64 const uint16_t p2 = vaddlv_u8(A); sum_top = vdupq_n_u16(p2); #else @@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x16_t A = vld1q_u8(dst - BPS); // top row -#if defined(__aarch64__) +#if WEBP_AARCH64 const uint16_t p3 = vaddlvq_u8(A); sum_top = vdupq_n_u16(p3); #else diff --git a/thirdparty/libwebp/src/dsp/enc.c b/thirdparty/libwebp/src/dsp/enc.c index ea47a3fd954..2ba97ba8d66 100644 --- a/thirdparty/libwebp/src/dsp/enc.c +++ b/thirdparty/libwebp/src/dsp/enc.c @@ -732,6 +732,7 @@ VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; VP8BlockCopy VP8Copy4x4; VP8BlockCopy VP8Copy16x8; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8EncDspInitSSE2(void); extern void VP8EncDspInitSSE41(void); extern void VP8EncDspInitNEON(void); diff --git a/thirdparty/libwebp/src/dsp/enc_neon.c b/thirdparty/libwebp/src/dsp/enc_neon.c index 3a04111c551..714800367ba 100644 --- a/thirdparty/libwebp/src/dsp/enc_neon.c +++ b/thirdparty/libwebp/src/dsp/enc_neon.c @@ -764,7 +764,7 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a, // Horizontal sum of all four uint32_t values in 'sum'. static int SumToInt_NEON(uint32x4_t sum) { -#if defined(__aarch64__) +#if WEBP_AARCH64 return (int)vaddvq_u32(sum); #else const uint64x2_t sum2 = vpaddlq_u32(sum); @@ -865,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16], uint8x8x4_t shuffles; // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use // non-standard versions there. -#if defined(__APPLE__) && defined(__aarch64__) && \ +#if defined(__APPLE__) && WEBP_AARCH64 && \ defined(__apple_build_version__) && (__apple_build_version__< 6020037) uint8x16x2_t all_out; INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1)); diff --git a/thirdparty/libwebp/src/dsp/enc_sse2.c b/thirdparty/libwebp/src/dsp/enc_sse2.c index 1d1055668f7..010624a2f71 100644 --- a/thirdparty/libwebp/src/dsp/enc_sse2.c +++ b/thirdparty/libwebp/src/dsp/enc_sse2.c @@ -25,9 +25,160 @@ //------------------------------------------------------------------------------ // Transforms (Paragraph 14.4) -// Does one or two inverse transforms. -static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, - int do_two) { +// Does one inverse transform. +static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in, + uint8_t* dst) { + // This implementation makes use of 16-bit fixed point versions of two + // multiply constants: + // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 + // K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16 + // + // To be able to use signed 16-bit integers, we use the following trick to + // have constants within range: + // - Associated constants are obtained by subtracting the 16-bit fixed point + // version of one: + // k = K - (1 << 16) => K = k + (1 << 16) + // K1 = 85267 => k1 = 20091 + // K2 = 35468 => k2 = -30068 + // - The multiplication of a variable by a constant become the sum of the + // variable and the multiplication of that variable by the associated + // constant: + // (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x + const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068, + 20091, 20091, 20091, 20091); + const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091, + -30068, -30068, -30068, -30068); + const __m128i zero = _mm_setzero_si128(); + const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4); + __m128i T01, T23; + + // Load and concatenate the transform coefficients. + const __m128i in01 = _mm_loadu_si128((const __m128i*)&in[0]); + const __m128i in23 = _mm_loadu_si128((const __m128i*)&in[8]); + // a00 a10 a20 a30 a01 a11 a21 a31 + // a02 a12 a22 a32 a03 a13 a23 a33 + + // Vertical pass and subsequent transpose. + { + const __m128i in1 = _mm_unpackhi_epi64(in01, in01); + const __m128i in3 = _mm_unpackhi_epi64(in23, in23); + + // First pass, c and d calculations are longer because of the "trick" + // multiplications. + // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3 + // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3 + const __m128i a_d3 = _mm_add_epi16(in01, in23); + const __m128i b_c3 = _mm_sub_epi16(in01, in23); + const __m128i c1d1 = _mm_mulhi_epi16(in1, k2k1); + const __m128i c2d2 = _mm_mulhi_epi16(in3, k1k2); + const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3); + const __m128i c4 = _mm_sub_epi16(c1d1, c2d2); + const __m128i c = _mm_add_epi16(c3, c4); + const __m128i d4u = _mm_add_epi16(c1d1, c2d2); + const __m128i du = _mm_add_epi16(a_d3, d4u); + const __m128i d = _mm_unpackhi_epi64(du, du); + + // Second pass. + const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3); + const __m128i comb_dc = _mm_unpacklo_epi64(d, c); + + const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc); + const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc); + const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2)); + + const __m128i transpose_0 = _mm_unpacklo_epi16(tmp01, tmp23); + const __m128i transpose_1 = _mm_unpackhi_epi16(tmp01, tmp23); + // a00 a20 a01 a21 a02 a22 a03 a23 + // a10 a30 a11 a31 a12 a32 a13 a33 + + T01 = _mm_unpacklo_epi16(transpose_0, transpose_1); + T23 = _mm_unpackhi_epi16(transpose_0, transpose_1); + // a00 a10 a20 a30 a01 a11 a21 a31 + // a02 a12 a22 a32 a03 a13 a23 a33 + } + + // Horizontal pass and subsequent transpose. + { + const __m128i T1 = _mm_unpackhi_epi64(T01, T01); + const __m128i T3 = _mm_unpackhi_epi64(T23, T23); + + // First pass, c and d calculations are longer because of the "trick" + // multiplications. + const __m128i dc = _mm_add_epi16(T01, zero_four); + + // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3 + // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3 + const __m128i a_d3 = _mm_add_epi16(dc, T23); + const __m128i b_c3 = _mm_sub_epi16(dc, T23); + const __m128i c1d1 = _mm_mulhi_epi16(T1, k2k1); + const __m128i c2d2 = _mm_mulhi_epi16(T3, k1k2); + const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3); + const __m128i c4 = _mm_sub_epi16(c1d1, c2d2); + const __m128i c = _mm_add_epi16(c3, c4); + const __m128i d4u = _mm_add_epi16(c1d1, c2d2); + const __m128i du = _mm_add_epi16(a_d3, d4u); + const __m128i d = _mm_unpackhi_epi64(du, du); + + // Second pass. + const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3); + const __m128i comb_dc = _mm_unpacklo_epi64(d, c); + + const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc); + const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc); + const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2)); + + const __m128i shifted01 = _mm_srai_epi16(tmp01, 3); + const __m128i shifted23 = _mm_srai_epi16(tmp23, 3); + // a00 a01 a02 a03 a10 a11 a12 a13 + // a20 a21 a22 a23 a30 a31 a32 a33 + + const __m128i transpose_0 = _mm_unpacklo_epi16(shifted01, shifted23); + const __m128i transpose_1 = _mm_unpackhi_epi16(shifted01, shifted23); + // a00 a20 a01 a21 a02 a22 a03 a23 + // a10 a30 a11 a31 a12 a32 a13 a33 + + T01 = _mm_unpacklo_epi16(transpose_0, transpose_1); + T23 = _mm_unpackhi_epi16(transpose_0, transpose_1); + // a00 a10 a20 a30 a01 a11 a21 a31 + // a02 a12 a22 a32 a03 a13 a23 a33 + } + + // Add inverse transform to 'ref' and store. + { + // Load the reference(s). + __m128i ref01, ref23, ref0123; + int32_t buf[4]; + + // Load four bytes/pixels per line. + const __m128i ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS])); + const __m128i ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS])); + const __m128i ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS])); + const __m128i ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS])); + ref01 = _mm_unpacklo_epi32(ref0, ref1); + ref23 = _mm_unpacklo_epi32(ref2, ref3); + + // Convert to 16b. + ref01 = _mm_unpacklo_epi8(ref01, zero); + ref23 = _mm_unpacklo_epi8(ref23, zero); + // Add the inverse transform(s). + ref01 = _mm_add_epi16(ref01, T01); + ref23 = _mm_add_epi16(ref23, T23); + // Unsigned saturate to 8b. + ref0123 = _mm_packus_epi16(ref01, ref23); + + _mm_storeu_si128((__m128i *)buf, ref0123); + + // Store four bytes/pixels per line. + WebPInt32ToMem(&dst[0 * BPS], buf[0]); + WebPInt32ToMem(&dst[1 * BPS], buf[1]); + WebPInt32ToMem(&dst[2 * BPS], buf[2]); + WebPInt32ToMem(&dst[3 * BPS], buf[3]); + } +} + +// Does two inverse transforms. +static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in, + uint8_t* dst) { // This implementation makes use of 16-bit fixed point versions of two // multiply constants: // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 @@ -49,33 +200,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, __m128i T0, T1, T2, T3; // Load and concatenate the transform coefficients (we'll do two inverse - // transforms in parallel). In the case of only one inverse transform, the - // second half of the vectors will just contain random value we'll never - // use nor store. + // transforms in parallel). __m128i in0, in1, in2, in3; { - in0 = _mm_loadl_epi64((const __m128i*)&in[0]); - in1 = _mm_loadl_epi64((const __m128i*)&in[4]); - in2 = _mm_loadl_epi64((const __m128i*)&in[8]); - in3 = _mm_loadl_epi64((const __m128i*)&in[12]); - // a00 a10 a20 a30 x x x x - // a01 a11 a21 a31 x x x x - // a02 a12 a22 a32 x x x x - // a03 a13 a23 a33 x x x x - if (do_two) { - const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]); - const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]); - const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]); - const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]); - in0 = _mm_unpacklo_epi64(in0, inB0); - in1 = _mm_unpacklo_epi64(in1, inB1); - in2 = _mm_unpacklo_epi64(in2, inB2); - in3 = _mm_unpacklo_epi64(in3, inB3); - // a00 a10 a20 a30 b00 b10 b20 b30 - // a01 a11 a21 a31 b01 b11 b21 b31 - // a02 a12 a22 a32 b02 b12 b22 b32 - // a03 a13 a23 a33 b03 b13 b23 b33 - } + const __m128i tmp0 = _mm_loadu_si128((const __m128i*)&in[0]); + const __m128i tmp1 = _mm_loadu_si128((const __m128i*)&in[8]); + const __m128i tmp2 = _mm_loadu_si128((const __m128i*)&in[16]); + const __m128i tmp3 = _mm_loadu_si128((const __m128i*)&in[24]); + in0 = _mm_unpacklo_epi64(tmp0, tmp2); + in1 = _mm_unpackhi_epi64(tmp0, tmp2); + in2 = _mm_unpacklo_epi64(tmp1, tmp3); + in3 = _mm_unpackhi_epi64(tmp1, tmp3); + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 } // Vertical pass and subsequent transpose. @@ -148,19 +287,11 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, const __m128i zero = _mm_setzero_si128(); // Load the reference(s). __m128i ref0, ref1, ref2, ref3; - if (do_two) { - // Load eight bytes/pixels per line. - ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]); - ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]); - ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]); - ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); - } else { - // Load four bytes/pixels per line. - ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS])); - ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS])); - ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS])); - ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS])); - } + // Load eight bytes/pixels per line. + ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]); + ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]); + ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]); + ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); // Convert to 16b. ref0 = _mm_unpacklo_epi8(ref0, zero); ref1 = _mm_unpacklo_epi8(ref1, zero); @@ -176,20 +307,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, ref1 = _mm_packus_epi16(ref1, ref1); ref2 = _mm_packus_epi16(ref2, ref2); ref3 = _mm_packus_epi16(ref3, ref3); - // Store the results. - if (do_two) { - // Store eight bytes/pixels per line. - _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0); - _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1); - _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2); - _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); - } else { - // Store four bytes/pixels per line. - WebPInt32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0)); - WebPInt32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1)); - WebPInt32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2)); - WebPInt32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3)); - } + // Store eight bytes/pixels per line. + _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0); + _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1); + _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2); + _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); + } +} + +// Does one or two inverse transforms. +static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, + int do_two) { + if (do_two) { + ITransform_Two_SSE2(ref, in, dst); + } else { + ITransform_One_SSE2(ref, in, dst); } } diff --git a/thirdparty/libwebp/src/dsp/filters.c b/thirdparty/libwebp/src/dsp/filters.c index 4506567ba36..85eee5098f0 100644 --- a/thirdparty/libwebp/src/dsp/filters.c +++ b/thirdparty/libwebp/src/dsp/filters.c @@ -233,6 +233,7 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in, WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8FiltersInitMIPSdspR2(void); extern void VP8FiltersInitMSA(void); extern void VP8FiltersInitNEON(void); diff --git a/thirdparty/libwebp/src/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c index fb86e58d4a4..9f812094539 100644 --- a/thirdparty/libwebp/src/dsp/lossless.c +++ b/thirdparty/libwebp/src/dsp/lossless.c @@ -588,6 +588,7 @@ VP8LConvertFunc VP8LConvertBGRAToBGR; VP8LMapARGBFunc VP8LMapColor32b; VP8LMapAlphaFunc VP8LMapColor8b; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8LDspInitSSE2(void); extern void VP8LDspInitSSE41(void); extern void VP8LDspInitNEON(void); diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c index b1f9f26d724..cde1280617b 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc.c @@ -791,6 +791,7 @@ VP8LBundleColorMapFunc VP8LBundleColorMap; VP8LPredictorAddSubFunc VP8LPredictorsSub[16]; VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8LEncDspInitSSE2(void); extern void VP8LEncDspInitSSE41(void); extern void VP8LEncDspInitNEON(void); diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_neon.c b/thirdparty/libwebp/src/dsp/lossless_enc_neon.c index 7c7b73f8b69..e32c7961a23 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_neon.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_neon.c @@ -25,7 +25,7 @@ // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use // non-standard versions there. -#if defined(__APPLE__) && defined(__aarch64__) && \ +#if defined(__APPLE__) && WEBP_AARCH64 && \ defined(__apple_build_version__) && (__apple_build_version__< 6020037) #define USE_VTBLQ #endif diff --git a/thirdparty/libwebp/src/dsp/lossless_neon.c b/thirdparty/libwebp/src/dsp/lossless_neon.c index 89e3e013a08..ddc9b61711e 100644 --- a/thirdparty/libwebp/src/dsp/lossless_neon.c +++ b/thirdparty/libwebp/src/dsp/lossless_neon.c @@ -498,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper, // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use // non-standard versions there. -#if defined(__APPLE__) && defined(__aarch64__) && \ +#if defined(__APPLE__) && WEBP_AARCH64 && \ defined(__apple_build_version__) && (__apple_build_version__< 6020037) #define USE_VTBLQ #endif diff --git a/thirdparty/libwebp/src/dsp/neon.h b/thirdparty/libwebp/src/dsp/neon.h index c591f9b9a78..14acb4044ba 100644 --- a/thirdparty/libwebp/src/dsp/neon.h +++ b/thirdparty/libwebp/src/dsp/neon.h @@ -21,7 +21,7 @@ // Right now, some intrinsics functions seem slower, so we disable them // everywhere except newer clang/gcc or aarch64 where the inline assembly is // incompatible. -#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__) +#if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64 #define WEBP_USE_INTRINSICS // use intrinsics when possible #endif @@ -46,7 +46,7 @@ // if using intrinsics, this flag avoids some functions that make gcc-4.6.3 // crash ("internal compiler error: in immed_double_const, at emit-rtl."). // (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183) -#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) +#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) #define WORK_AROUND_GCC #endif diff --git a/thirdparty/libwebp/src/dsp/quant.h b/thirdparty/libwebp/src/dsp/quant.h index fc099bf9d67..bf7734cb11d 100644 --- a/thirdparty/libwebp/src/dsp/quant.h +++ b/thirdparty/libwebp/src/dsp/quant.h @@ -22,7 +22,7 @@ #define IsFlat IsFlat_NEON static uint32_t horizontal_add_uint32x4(const uint32x4_t a) { -#if defined(__aarch64__) +#if WEBP_AARCH64 return vaddvq_u32(a); #else const uint64x2_t b = vpaddlq_u32(a); diff --git a/thirdparty/libwebp/src/dsp/rescaler.c b/thirdparty/libwebp/src/dsp/rescaler.c index 14620ce4f1e..325d8be1808 100644 --- a/thirdparty/libwebp/src/dsp/rescaler.c +++ b/thirdparty/libwebp/src/dsp/rescaler.c @@ -197,6 +197,7 @@ WebPRescalerImportRowFunc WebPRescalerImportRowShrink; WebPRescalerExportRowFunc WebPRescalerExportRowExpand; WebPRescalerExportRowFunc WebPRescalerExportRowShrink; +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPRescalerDspInitSSE2(void); extern void WebPRescalerDspInitMIPS32(void); extern void WebPRescalerDspInitMIPSdspR2(void); diff --git a/thirdparty/libwebp/src/dsp/ssim.c b/thirdparty/libwebp/src/dsp/ssim.c index f85c2e6e5b5..9a1341ed958 100644 --- a/thirdparty/libwebp/src/dsp/ssim.c +++ b/thirdparty/libwebp/src/dsp/ssim.c @@ -137,6 +137,7 @@ VP8SSIMGetClippedFunc VP8SSIMGetClipped; VP8AccumulateSSEFunc VP8AccumulateSSE; #endif +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8SSIMDspInitSSE2(void); WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) { diff --git a/thirdparty/libwebp/src/dsp/upsampling.c b/thirdparty/libwebp/src/dsp/upsampling.c index 87f771f3eb5..983b9c42d36 100644 --- a/thirdparty/libwebp/src/dsp/upsampling.c +++ b/thirdparty/libwebp/src/dsp/upsampling.c @@ -215,6 +215,7 @@ static void EmptyYuv444Func(const uint8_t* y, WebPYUV444Converter WebPYUV444Converters[MODE_LAST]; +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPInitYUV444ConvertersMIPSdspR2(void); extern void WebPInitYUV444ConvertersSSE2(void); extern void WebPInitYUV444ConvertersSSE41(void); diff --git a/thirdparty/libwebp/src/dsp/upsampling_neon.c b/thirdparty/libwebp/src/dsp/upsampling_neon.c index 6ba71a7de53..bbc000ca2d3 100644 --- a/thirdparty/libwebp/src/dsp/upsampling_neon.c +++ b/thirdparty/libwebp/src/dsp/upsampling_neon.c @@ -111,7 +111,7 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 }; vst4_u8(out, v255_r_g_b); \ } while (0) -#if !defined(WEBP_SWAP_16BIT_CSP) +#if (WEBP_SWAP_16BIT_CSP == 0) #define ZIP_U8(lo, hi) vzip_u8((lo), (hi)) #else #define ZIP_U8(lo, hi) vzip_u8((hi), (lo)) diff --git a/thirdparty/libwebp/src/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c index d16c13d3ca3..8a04b85d82d 100644 --- a/thirdparty/libwebp/src/dsp/yuv.c +++ b/thirdparty/libwebp/src/dsp/yuv.c @@ -70,6 +70,7 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, WebPSamplerRowFunc WebPSamplers[MODE_LAST]; +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPInitSamplersSSE2(void); extern void WebPInitSamplersSSE41(void); extern void WebPInitSamplersMIPS32(void); diff --git a/thirdparty/libwebp/src/enc/alpha_enc.c b/thirdparty/libwebp/src/enc/alpha_enc.c index f7c02690e3c..26f003485aa 100644 --- a/thirdparty/libwebp/src/enc/alpha_enc.c +++ b/thirdparty/libwebp/src/enc/alpha_enc.c @@ -13,6 +13,7 @@ #include #include +#include #include "src/enc/vp8i_enc.h" #include "src/dsp/dsp.h" @@ -140,6 +141,11 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, !reduce_levels, &tmp_bw, &result->stats); if (ok) { output = VP8LBitWriterFinish(&tmp_bw); + if (tmp_bw.error_) { + VP8LBitWriterWipeOut(&tmp_bw); + memset(&result->bw, 0, sizeof(result->bw)); + return 0; + } output_size = VP8LBitWriterNumBytes(&tmp_bw); if (output_size > data_size) { // compressed size is larger than source! Revert to uncompressed mode. @@ -148,6 +154,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, } } else { VP8LBitWriterWipeOut(&tmp_bw); + memset(&result->bw, 0, sizeof(result->bw)); return 0; } } @@ -162,7 +169,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, header = method | (filter << 2); if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; - VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); + if (!VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size)) ok = 0; ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); @@ -312,11 +319,11 @@ static int EncodeAlpha(VP8Encoder* const enc, assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST); if (quality < 0 || quality > 100) { - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); } if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) { - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); } if (method == ALPHA_NO_COMPRESSION) { @@ -326,7 +333,7 @@ static int EncodeAlpha(VP8Encoder* const enc, quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); if (quant_alpha == NULL) { - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } // Extract alpha data (width x height) from raw_data (stride x height). @@ -346,6 +353,9 @@ static int EncodeAlpha(VP8Encoder* const enc, ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, filter, reduce_levels, effort_level, output, output_size, pic->stats); + if (!ok) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise + } #if !defined(WEBP_DISABLE_STATS) if (pic->stats != NULL) { // need stats? pic->stats->coded_size += (int)(*output_size); @@ -405,7 +415,7 @@ int VP8EncStartAlpha(VP8Encoder* const enc) { WebPWorker* const worker = &enc->alpha_worker_; // Makes sure worker is good to go. if (!WebPGetWorkerInterface()->Reset(worker)) { - return 0; + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } WebPGetWorkerInterface()->Launch(worker); return 1; diff --git a/thirdparty/libwebp/src/enc/analysis_enc.c b/thirdparty/libwebp/src/enc/analysis_enc.c index a0001ac0348..962eaa998f8 100644 --- a/thirdparty/libwebp/src/enc/analysis_enc.c +++ b/thirdparty/libwebp/src/enc/analysis_enc.c @@ -474,6 +474,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) { } else { // Use only one default segment. ResetAllMBInfo(enc); } + if (!ok) { + return WebPEncodingSetError(enc->pic_, + VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise + } return ok; } diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.c b/thirdparty/libwebp/src/enc/backward_references_enc.c index 49a0fac0345..dc98bf17194 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.c +++ b/thirdparty/libwebp/src/enc/backward_references_enc.c @@ -283,8 +283,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, hash_to_first_index = (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); if (hash_to_first_index == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } percent_range = remaining_percent / 2; @@ -1050,8 +1049,7 @@ int VP8LGetBackwardReferences( refs_best = GetBackwardReferencesLowEffort( width, height, argb, cache_bits_best, hash_chain, refs); if (refs_best == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } // Set it in first position. BackwardRefsSwap(refs_best, &refs[0]); @@ -1059,8 +1057,7 @@ int VP8LGetBackwardReferences( if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, cache_bits_max, do_no_cache, hash_chain, refs, cache_bits_best)) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } } diff --git a/thirdparty/libwebp/src/enc/frame_enc.c b/thirdparty/libwebp/src/enc/frame_enc.c index b93d9e5b991..9a98dc1f3ee 100644 --- a/thirdparty/libwebp/src/enc/frame_enc.c +++ b/thirdparty/libwebp/src/enc/frame_enc.c @@ -689,7 +689,7 @@ static int PreLoopInitialize(VP8Encoder* const enc) { } if (!ok) { VP8EncFreeBitWriters(enc); // malloc error occurred - WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } return ok; } @@ -719,6 +719,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) { } else { // Something bad happened -> need to do some memory cleanup. VP8EncFreeBitWriters(enc); + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } return ok; } @@ -754,6 +755,11 @@ int VP8EncLoop(VP8Encoder* const enc) { // *then* decide how to code the skip decision if there's one. if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) { CodeResiduals(it.bw_, &it, &info); + if (it.bw_->error_) { + // enc->pic_->error_code is set in PostLoopFinalize(). + ok = 0; + break; + } } else { // reset predictors after a skip ResetAfterSkip(&it); } diff --git a/thirdparty/libwebp/src/enc/picture_csp_enc.c b/thirdparty/libwebp/src/enc/picture_csp_enc.c index 78c8ca479b0..a9280e6c305 100644 --- a/thirdparty/libwebp/src/enc/picture_csp_enc.c +++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c @@ -98,6 +98,7 @@ static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1]; static uint16_t kGammaToLinearTab[256]; static volatile int kGammaTablesOk = 0; static void InitGammaTables(void); +extern VP8CPUInfo VP8GetCPUInfo; WEBP_DSP_INIT_FUNC(InitGammaTables) { if (!kGammaTablesOk) { @@ -534,7 +535,9 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, WebPInitConvertARGBToYUV(); InitGammaTables(); - if (tmp_rgb == NULL) return 0; // malloc error + if (tmp_rgb == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + } // Downsample Y/U/V planes, two rows at a time for (y = 0; y < (height >> 1); ++y) { diff --git a/thirdparty/libwebp/src/enc/picture_rescale_enc.c b/thirdparty/libwebp/src/enc/picture_rescale_enc.c index 839f91cacc0..ea90d825484 100644 --- a/thirdparty/libwebp/src/enc/picture_rescale_enc.c +++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c @@ -137,7 +137,9 @@ int WebPPictureCrop(WebPPicture* pic, PictureGrabSpecs(pic, &tmp); tmp.width = width; tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; + if (!WebPPictureAlloc(&tmp)) { + return WebPEncodingSetError(pic, tmp.error_code); + } if (!pic->use_argb) { const int y_offset = top * pic->y_stride + left; @@ -212,26 +214,28 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) { prev_height = picture->height; if (!WebPRescalerGetScaledDimensions( prev_width, prev_height, &width, &height)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } PictureGrabSpecs(picture, &tmp); tmp.width = width; tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; + if (!WebPPictureAlloc(&tmp)) { + return WebPEncodingSetError(picture, tmp.error_code); + } if (!picture->use_argb) { work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } // If present, we need to rescale alpha first (for AlphaMultiplyY). if (picture->a != NULL) { WebPInitAlphaProcessing(); if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride, tmp.a, width, height, tmp.a_stride, work, 1)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } } @@ -246,14 +250,14 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) { !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height), picture->uv_stride, tmp.v, HALVE(width), HALVE(height), tmp.uv_stride, work, 1)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } AlphaMultiplyY(&tmp, 1); } else { work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } // In order to correctly interpolate colors, we need to apply the alpha // weighting first (black-matting), scale the RGB values, and remove @@ -263,7 +267,7 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) { if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height, picture->argb_stride * 4, (uint8_t*)tmp.argb, width, height, tmp.argb_stride * 4, work, 4)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } AlphaMultiplyARGB(&tmp, 1); } diff --git a/thirdparty/libwebp/src/enc/syntax_enc.c b/thirdparty/libwebp/src/enc/syntax_enc.c index e18cf650cab..9b8f524d698 100644 --- a/thirdparty/libwebp/src/enc/syntax_enc.c +++ b/thirdparty/libwebp/src/enc/syntax_enc.c @@ -258,7 +258,10 @@ static int EmitPartitionsSize(const VP8Encoder* const enc, buf[3 * p + 1] = (part_size >> 8) & 0xff; buf[3 * p + 2] = (part_size >> 16) & 0xff; } - return p ? pic->writer(buf, 3 * p, pic) : 1; + if (p && !pic->writer(buf, 3 * p, pic)) { + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); + } + return 1; } //------------------------------------------------------------------------------ @@ -381,6 +384,7 @@ int VP8EncWrite(VP8Encoder* const enc) { enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size); ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_); + if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); return ok; } diff --git a/thirdparty/libwebp/src/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h index c9927c47d84..19d9a6edb77 100644 --- a/thirdparty/libwebp/src/enc/vp8i_enc.h +++ b/thirdparty/libwebp/src/enc/vp8i_enc.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 #define ENC_MIN_VERSION 3 -#define ENC_REV_VERSION 0 +#define ENC_REV_VERSION 1 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost diff --git a/thirdparty/libwebp/src/enc/vp8l_enc.c b/thirdparty/libwebp/src/enc/vp8l_enc.c index 0b07e529a9a..3a8ec3dd1ec 100644 --- a/thirdparty/libwebp/src/enc/vp8l_enc.c +++ b/thirdparty/libwebp/src/enc/vp8l_enc.c @@ -196,8 +196,7 @@ static int CoOccurrenceBuild(const WebPPicture* const pic, uint32_t palette_sorted[MAX_PALETTE_SIZE]; lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines)); if (lines == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } line_top = &lines[0]; line_current = &lines[pic->width]; @@ -255,10 +254,10 @@ static int PaletteSortModifiedZeng( cooccurrence = (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence)); if (cooccurrence == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } if (!CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence)) { + WebPSafeFree(cooccurrence); return 0; } @@ -1012,8 +1011,7 @@ static int StoreImageToBitMask( VP8LRefsCursorNext(&c); } if (bw->error_) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } return 1; } @@ -1297,7 +1295,10 @@ static int EncodeImageInternal( } } tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) goto Error; + if (tokens == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } for (i = 0; i < 5 * histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; StoreHuffmanCode(bw, huff_tree, tokens, codes); @@ -1448,18 +1449,21 @@ static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw, const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size; const size_t pad = vp8l_size & 1; const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad; + *coded_size = 0; + + if (bw->error_) { + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + } if (!WriteRiffHeader(pic, riff_size, vp8l_size) || !pic->writer(webpll_data, webpll_size, pic)) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); } if (pad) { const uint8_t pad_byte[1] = { 0 }; if (!pic->writer(pad_byte, 1, pic)) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); } } *coded_size = CHUNK_HEADER_SIZE + riff_size; @@ -1504,8 +1508,7 @@ static int AllocateTransformBuffer(VP8LEncoder* const enc, int width, ClearTransformBuffer(enc); mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); if (mem == NULL) { - WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } enc->transform_mem_ = mem; enc->transform_mem_size_ = (size_t)mem_size; @@ -1613,8 +1616,7 @@ static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst, int x, y; if (tmp_row == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } if (palette_size < APPLY_PALETTE_GREEDY_MAX) { @@ -1968,9 +1970,8 @@ int VP8LEncodeStream(const WebPConfig* const config, int ok_main; if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) { - WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); VP8LEncoderDelete(enc_main); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } // Avoid "garbage value" error from Clang's static analysis tool. @@ -2117,8 +2118,7 @@ int VP8LEncodeImage(const WebPConfig* const config, if (picture == NULL) return 0; if (config == NULL || picture->argb == NULL) { - WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); } width = picture->width; diff --git a/thirdparty/libwebp/src/enc/webp_enc.c b/thirdparty/libwebp/src/enc/webp_enc.c index 9620e050706..583fe6a8bbd 100644 --- a/thirdparty/libwebp/src/enc/webp_enc.c +++ b/thirdparty/libwebp/src/enc/webp_enc.c @@ -307,7 +307,10 @@ int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error) { assert((int)error < VP8_ENC_ERROR_LAST); assert((int)error >= VP8_ENC_OK); - ((WebPPicture*)pic)->error_code = error; + // The oldest error reported takes precedence over the new one. + if (pic->error_code == VP8_ENC_OK) { + ((WebPPicture*)pic)->error_code = error; + } return 0; } @@ -317,8 +320,7 @@ int WebPReportProgress(const WebPPicture* const pic, *percent_store = percent; if (pic->progress_hook && !pic->progress_hook(percent, pic)) { // user abort requested - WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); } } return 1; // ok @@ -329,7 +331,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { int ok = 0; if (pic == NULL) return 0; - WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far + pic->error_code = VP8_ENC_OK; // all ok so far if (config == NULL) { // bad params return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); } diff --git a/thirdparty/libwebp/src/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h index 7929138c44d..fc44d6f2feb 100644 --- a/thirdparty/libwebp/src/mux/muxi.h +++ b/thirdparty/libwebp/src/mux/muxi.h @@ -29,7 +29,7 @@ extern "C" { #define MUX_MAJ_VERSION 1 #define MUX_MIN_VERSION 3 -#define MUX_REV_VERSION 0 +#define MUX_REV_VERSION 1 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/thirdparty/libwebp/src/mux/muxread.c b/thirdparty/libwebp/src/mux/muxread.c index 80050396e1f..9862ec68eea 100644 --- a/thirdparty/libwebp/src/mux/muxread.c +++ b/thirdparty/libwebp/src/mux/muxread.c @@ -116,9 +116,12 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data, // Each of ANMF chunk contain a header at the beginning. So, its size should // be at least 'hdr_size'. if (size < hdr_size) goto Fail; - ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_); + if (ChunkAssignData(&subchunk, &temp, copy_data, + chunk->tag_) != WEBP_MUX_OK) { + goto Fail; + } } - ChunkSetHead(&subchunk, &wpi->header_); + if (ChunkSetHead(&subchunk, &wpi->header_) != WEBP_MUX_OK) goto Fail; wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks. // Rest of the chunks. diff --git a/thirdparty/libwebp/src/utils/bit_reader_utils.c b/thirdparty/libwebp/src/utils/bit_reader_utils.c index 857cd609888..a26557aa49f 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_utils.c +++ b/thirdparty/libwebp/src/utils/bit_reader_utils.c @@ -15,6 +15,7 @@ #include "src/webp/config.h" #endif +#include "src/dsp/cpu.h" #include "src/utils/bit_reader_inl_utils.h" #include "src/utils/utils.h" @@ -121,7 +122,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits, #define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. -#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ +#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \ defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) #define VP8L_USE_FAST_LOAD diff --git a/thirdparty/libwebp/src/utils/bit_reader_utils.h b/thirdparty/libwebp/src/utils/bit_reader_utils.h index e64156e3181..25ff31e5d97 100644 --- a/thirdparty/libwebp/src/utils/bit_reader_utils.h +++ b/thirdparty/libwebp/src/utils/bit_reader_utils.h @@ -19,6 +19,7 @@ #ifdef _MSC_VER #include // _byteswap_ulong #endif +#include "src/dsp/cpu.h" #include "src/webp/types.h" // Warning! This macro triggers quite some MACRO wizardry around func signature! @@ -64,7 +65,7 @@ extern "C" { #define BITS 56 #elif defined(__arm__) || defined(_M_ARM) // ARM #define BITS 24 -#elif defined(__aarch64__) // ARM 64bit +#elif WEBP_AARCH64 // ARM 64bit #define BITS 56 #elif defined(__mips__) // MIPS #define BITS 24 diff --git a/thirdparty/libwebp/src/webp/decode.h b/thirdparty/libwebp/src/webp/decode.h index d98247509a8..0177b120897 100644 --- a/thirdparty/libwebp/src/webp/decode.h +++ b/thirdparty/libwebp/src/webp/decode.h @@ -81,10 +81,11 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, // returned is the Y samples buffer. Upon return, *u and *v will point to // the U and V chroma data. These U and V buffers need NOT be passed to // WebPFree(), unlike the returned Y luma one. The dimension of the U and V -// planes are both (*width + 1) / 2 and (*height + 1)/ 2. +// planes are both (*width + 1) / 2 and (*height + 1) / 2. // Upon return, the Y buffer has a stride returned as '*stride', while U and V // have a common stride returned as '*uv_stride'. -// Return NULL in case of error. +// 'width' and 'height' may be NULL, the other pointers must not be. +// Returns NULL in case of error. // (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height,