Merge pull request #57102 from akien-mga/libwebp-1.2.2
This commit is contained in:
commit
6acbd5f774
|
@ -309,7 +309,7 @@ Files extracted from upstream source:
|
|||
## libwebp
|
||||
|
||||
- Upstream: https://chromium.googlesource.com/webm/libwebp/
|
||||
- Version: 1.2.1 (9ce5843dbabcfd3f7c39ec7ceba9cbeb213cbfdf, 2021)
|
||||
- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022)
|
||||
- License: BSD-3-Clause
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
@ -317,10 +317,6 @@ Files extracted from upstream source:
|
|||
- `src/*` except from: `.am`, `.rc` and `.in` files
|
||||
- `AUTHORS`, `COPYING`, `PATENTS`
|
||||
|
||||
Important: The files `utils/bit_reader_utils.{c,h}` have Godot-made
|
||||
changes to ensure they build for Javascript/HTML5. Those
|
||||
changes are marked with `// -- GODOT --` comments.
|
||||
|
||||
|
||||
## mbedtls
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ Contributors:
|
|||
- Pascal Massimino (pascal dot massimino at gmail dot com)
|
||||
- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
|
||||
- Pierre Joye (pierre dot php at gmail dot com)
|
||||
- Roberto Alanis (alanisbaez at google dot com)
|
||||
- Sam Clegg (sbc at chromium dot org)
|
||||
- Scott Hancher (seh at google dot com)
|
||||
- Scott LaVarnway (slavarnway at google dot com)
|
||||
|
|
|
@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = {
|
|||
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
|
||||
};
|
||||
|
||||
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
|
||||
// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
|
||||
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
|
||||
int v;
|
||||
if (!VP8GetBit(br, p[3], "coeffs")) {
|
||||
|
|
|
@ -32,7 +32,7 @@ extern "C" {
|
|||
// version numbers
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 2
|
||||
#define DEC_REV_VERSION 1
|
||||
#define DEC_REV_VERSION 2
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||
|
|
|
@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
|
|||
// to 256 (green component values) + 24 (length prefix values)
|
||||
// + color_cache_size (between 0 and 2048).
|
||||
// All values computed for 8-bit first level lookup with Mark Adler's tool:
|
||||
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
|
||||
// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
|
||||
#define FIXED_TABLE_SIZE (630 * 3 + 410)
|
||||
static const uint16_t kTableSize[12] = {
|
||||
FIXED_TABLE_SIZE + 654,
|
||||
|
|
|
@ -23,6 +23,14 @@
|
|||
|
||||
#define NUM_CHANNELS 4
|
||||
|
||||
// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA
|
||||
// buffer.
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define CHANNEL_SHIFT(i) (24 - (i) * 8)
|
||||
#else
|
||||
#define CHANNEL_SHIFT(i) ((i) * 8)
|
||||
#endif
|
||||
|
||||
typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
|
||||
static void BlendPixelRowNonPremult(uint32_t* const src,
|
||||
const uint32_t* const dst, int num_pixels);
|
||||
|
@ -209,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
|
|||
const uint8_t dst_channel = (dst >> shift) & 0xff;
|
||||
const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
|
||||
assert(blend_unscaled < (1ULL << 32) / scale);
|
||||
return (blend_unscaled * scale) >> 24;
|
||||
return (blend_unscaled * scale) >> CHANNEL_SHIFT(3);
|
||||
}
|
||||
|
||||
// Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
|
||||
static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
|
||||
const uint8_t src_a = (src >> 24) & 0xff;
|
||||
const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
|
||||
if (src_a == 0) {
|
||||
return dst;
|
||||
} else {
|
||||
const uint8_t dst_a = (dst >> 24) & 0xff;
|
||||
const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
// This is the approximate integer arithmetic for the actual formula:
|
||||
// dst_factor_a = (dst_a * (255 - src_a)) / 255.
|
||||
const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
|
||||
const uint8_t blend_a = src_a + dst_factor_a;
|
||||
const uint32_t scale = (1UL << 24) / blend_a;
|
||||
|
||||
const uint8_t blend_r =
|
||||
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
|
||||
const uint8_t blend_g =
|
||||
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
|
||||
const uint8_t blend_b =
|
||||
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
|
||||
const uint8_t blend_r = BlendChannelNonPremult(
|
||||
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0));
|
||||
const uint8_t blend_g = BlendChannelNonPremult(
|
||||
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1));
|
||||
const uint8_t blend_b = BlendChannelNonPremult(
|
||||
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2));
|
||||
assert(src_a + dst_factor_a < 256);
|
||||
|
||||
return (blend_r << 0) |
|
||||
(blend_g << 8) |
|
||||
(blend_b << 16) |
|
||||
((uint32_t)blend_a << 24);
|
||||
return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) |
|
||||
((uint32_t)blend_g << CHANNEL_SHIFT(1)) |
|
||||
((uint32_t)blend_b << CHANNEL_SHIFT(2)) |
|
||||
((uint32_t)blend_a << CHANNEL_SHIFT(3));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src,
|
|||
const uint32_t* const dst, int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint8_t src_alpha = (src[i] >> 24) & 0xff;
|
||||
const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
if (src_alpha != 0xff) {
|
||||
src[i] = BlendPixelNonPremult(src[i], dst[i]);
|
||||
}
|
||||
|
@ -264,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
|
|||
|
||||
// Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
|
||||
static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
|
||||
const uint8_t src_a = (src >> 24) & 0xff;
|
||||
const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
return src + ChannelwiseMultiply(dst, 256 - src_a);
|
||||
}
|
||||
|
||||
|
@ -274,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
|
|||
int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint8_t src_alpha = (src[i] >> 24) & 0xff;
|
||||
const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
if (src_alpha != 0xff) {
|
||||
src[i] = BlendPixelPremult(src[i], dst[i]);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 2
|
||||
#define DMUX_REV_VERSION 1
|
||||
#define DMUX_REV_VERSION 2
|
||||
|
||||
typedef struct {
|
||||
size_t start_; // start location of the data
|
||||
|
|
|
@ -119,7 +119,12 @@ extern "C" {
|
|||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
|
||||
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
|
||||
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
|
||||
// arm_neon.h.
|
||||
#if defined(_MSC_VER) && \
|
||||
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
|
||||
(_MSC_VER >= 1920 && defined(_M_ARM64)))
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
//
|
||||
// ARM NEON version of speed-critical encoding functions.
|
||||
//
|
||||
// adapted from libvpx (http://www.webmproject.org/code/)
|
||||
// adapted from libvpx (https://www.webmproject.org/code/)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
|
|
|
@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
|||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
|
||||
uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
(void)left;
|
||||
return ARGB_BLACK;
|
||||
}
|
||||
uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
return left;
|
||||
return *left;
|
||||
}
|
||||
uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor2_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[0];
|
||||
}
|
||||
uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor3_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[1];
|
||||
}
|
||||
uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor4_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[-1];
|
||||
}
|
||||
uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(left, top[0], top[1]);
|
||||
uint32_t VP8LPredictor5_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(*left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[-1]);
|
||||
uint32_t VP8LPredictor6_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(*left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[0]);
|
||||
uint32_t VP8LPredictor7_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(*left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor8_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor9_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
|
||||
uint32_t VP8LPredictor10_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], left, top[-1]);
|
||||
uint32_t VP8LPredictor11_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], *left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
uint32_t VP8LPredictor12_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,23 +28,38 @@ extern "C" {
|
|||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
|
||||
typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
extern VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor2_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor3_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor4_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor5_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor6_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor7_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor8_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor9_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor10_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor11_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor12_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
|
||||
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
|
||||
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
|
||||
|
|
|
@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
|||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
|
|
|
@ -745,7 +745,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
|
|||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = \
|
||||
VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \
|
||||
VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \
|
||||
out[x] = VP8LSubPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
|
|
|
@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
|
|||
return Average2(Average2(a0, a1), Average2(a2, a3));
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
return Average3(left, top[0], top[1]);
|
||||
static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average3(*left, top[0], top[1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
return Average2(left, top[-1]);
|
||||
static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2(*left, top[-1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
return Average2(left, top[0]);
|
||||
static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2(*left, top[0]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return Average2(top[-1], top[0]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return Average2(top[0], top[1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor10_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average4(left, top[-1], top[0], top[1]);
|
||||
return Average4(*left, top[-1], top[0], top[1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor11_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Select(top[0], left, top[-1]);
|
||||
return Select(top[0], *left, top[-1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor12_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
return ClampedAddSubtractFull(*left, top[0], top[-1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor13_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
return ClampedAddSubtractHalf(*left, top[0], top[-1]);
|
||||
}
|
||||
|
||||
// Add green to blue and red channels (i.e. perform the inverse transform of
|
||||
|
|
|
@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1,
|
|||
return avg;
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return Average3_NEON(left, top[0], top[1]);
|
||||
static uint32_t Predictor5_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average3_NEON(*left, top[0], top[1]);
|
||||
}
|
||||
static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return Average2_NEON(left, top[-1]);
|
||||
static uint32_t Predictor6_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2_NEON(*left, top[-1]);
|
||||
}
|
||||
static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return Average2_NEON(left, top[0]);
|
||||
static uint32_t Predictor7_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2_NEON(*left, top[0]);
|
||||
}
|
||||
static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]);
|
||||
static uint32_t Predictor13_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]);
|
||||
}
|
||||
|
||||
// Batch versions of those functions.
|
||||
|
|
|
@ -138,42 +138,51 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
|
|||
return output;
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
|
||||
static uint32_t Predictor5_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average3_SSE2(*left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(left, top[-1]);
|
||||
static uint32_t Predictor6_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(*left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(left, top[0]);
|
||||
static uint32_t Predictor7_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(*left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor8_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor9_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
|
||||
static uint32_t Predictor10_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
|
||||
static uint32_t Predictor11_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Select_SSE2(top[0], *left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
|
||||
static uint32_t Predictor12_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
|
||||
static uint32_t Predictor13_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,6 +14,10 @@
|
|||
#ifndef WEBP_DSP_MSA_MACRO_H_
|
||||
#define WEBP_DSP_MSA_MACRO_H_
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <msa.h>
|
||||
|
||||
|
@ -1389,4 +1393,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
|
|||
} while (0)
|
||||
#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif // WEBP_USE_MSA
|
||||
#endif // WEBP_DSP_MSA_MACRO_H_
|
||||
|
|
|
@ -12,10 +12,12 @@
|
|||
#ifndef WEBP_DSP_NEON_H_
|
||||
#define WEBP_DSP_NEON_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Right now, some intrinsics functions seem slower, so we disable them
|
||||
// everywhere except newer clang/gcc or aarch64 where the inline assembly is
|
||||
// incompatible.
|
||||
|
@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
|
|||
} while (0)
|
||||
#endif
|
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
#endif // WEBP_DSP_NEON_H_
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
// inline YUV<->RGB conversion function
|
||||
//
|
||||
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
|
||||
// More information at: http://en.wikipedia.org/wiki/YCbCr
|
||||
// More information at: https://en.wikipedia.org/wiki/YCbCr
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
|
|
|
@ -778,6 +778,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
|||
// Roughly refresh the proba eight times per pass
|
||||
int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
|
||||
int num_pass_left = enc->config_->pass;
|
||||
int remaining_progress = 40; // percents
|
||||
const int do_search = enc->do_search_;
|
||||
VP8EncIterator it;
|
||||
VP8EncProba* const proba = &enc->proba_;
|
||||
|
@ -805,6 +806,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
|||
uint64_t size_p0 = 0;
|
||||
uint64_t distortion = 0;
|
||||
int cnt = max_count;
|
||||
// The final number of passes is not trivial to know in advance.
|
||||
const int pass_progress = remaining_progress / (2 + num_pass_left);
|
||||
remaining_progress -= pass_progress;
|
||||
VP8IteratorInit(enc, &it);
|
||||
SetLoopParams(enc, stats.q);
|
||||
if (is_last_pass) {
|
||||
|
@ -832,7 +836,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
|||
StoreSideInfo(&it);
|
||||
VP8StoreFilterStats(&it);
|
||||
VP8IteratorExport(&it);
|
||||
ok = VP8IteratorProgress(&it, 20);
|
||||
ok = VP8IteratorProgress(&it, pass_progress);
|
||||
}
|
||||
VP8IteratorSaveBoundary(&it);
|
||||
} while (ok && VP8IteratorNext(&it));
|
||||
|
@ -878,7 +882,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
|||
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
|
||||
(const uint8_t*)proba->coeffs_, 1);
|
||||
}
|
||||
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
|
||||
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
|
||||
&enc->percent_);
|
||||
return PostLoopFinalize(&it, ok);
|
||||
}
|
||||
|
||||
|
|
|
@ -249,7 +249,7 @@ static WEBP_INLINE void GetResidual(
|
|||
} else if (x == 0) {
|
||||
predict = upper_row[x]; // Top.
|
||||
} else {
|
||||
predict = pred_func(current_row[x - 1], upper_row + x);
|
||||
predict = pred_func(¤t_row[x - 1], upper_row + x);
|
||||
}
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
|
||||
|
|
|
@ -585,6 +585,9 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
|
|||
return rate * lambda + RD_DISTO_MULT * distortion;
|
||||
}
|
||||
|
||||
// Coefficient type.
|
||||
enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
|
||||
|
||||
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
int16_t in[16], int16_t out[16],
|
||||
int ctx0, int coeff_type,
|
||||
|
@ -593,7 +596,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
||||
CostArrayPtr const costs =
|
||||
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
|
||||
const int first = (coeff_type == 0) ? 1 : 0;
|
||||
const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
|
||||
Node nodes[16][NUM_NODES];
|
||||
ScoreState score_states[2][NUM_NODES];
|
||||
ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
|
||||
|
@ -657,16 +660,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||
// test all alternate level values around level0.
|
||||
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
||||
Node* const cur = &NODE(n, m);
|
||||
int level = level0 + m;
|
||||
const int level = level0 + m;
|
||||
const int ctx = (level > 2) ? 2 : level;
|
||||
const int band = VP8EncBands[n + 1];
|
||||
score_t base_score;
|
||||
score_t best_cur_score = MAX_COST;
|
||||
int best_prev = 0; // default, in case
|
||||
score_t best_cur_score;
|
||||
int best_prev;
|
||||
score_t cost, score;
|
||||
|
||||
ss_cur[m].score = MAX_COST;
|
||||
ss_cur[m].costs = costs[n + 1][ctx];
|
||||
if (level < 0 || level > thresh_level) {
|
||||
ss_cur[m].score = MAX_COST;
|
||||
// Node is dead.
|
||||
continue;
|
||||
}
|
||||
|
@ -682,18 +686,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||
}
|
||||
|
||||
// Inspect all possible non-dead predecessors. Retain only the best one.
|
||||
for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
|
||||
// The base_score is added to all scores so it is only added for the final
|
||||
// value after the loop.
|
||||
cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level);
|
||||
best_cur_score =
|
||||
ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0);
|
||||
best_prev = -MIN_DELTA;
|
||||
for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) {
|
||||
// Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
|
||||
// eliminated since their score can't be better than the current best.
|
||||
const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
|
||||
cost = VP8LevelCost(ss_prev[p].costs, level);
|
||||
// Examine node assuming it's a non-terminal one.
|
||||
const score_t score =
|
||||
base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
|
||||
score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
|
||||
if (score < best_cur_score) {
|
||||
best_cur_score = score;
|
||||
best_prev = p;
|
||||
}
|
||||
}
|
||||
best_cur_score += base_score;
|
||||
// Store best finding in current node.
|
||||
cur->sign = sign;
|
||||
cur->level = level;
|
||||
|
@ -701,11 +711,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||
ss_cur[m].score = best_cur_score;
|
||||
|
||||
// Now, record best terminal node (and thus best entry in the graph).
|
||||
if (level != 0) {
|
||||
if (level != 0 && best_cur_score < best_score) {
|
||||
const score_t last_pos_cost =
|
||||
(n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
|
||||
const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
|
||||
const score_t score = best_cur_score + last_pos_score;
|
||||
score = best_cur_score + last_pos_score;
|
||||
if (score < best_score) {
|
||||
best_score = score;
|
||||
best_path[0] = n; // best eob position
|
||||
|
@ -717,10 +727,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||
}
|
||||
|
||||
// Fresh start
|
||||
memset(in + first, 0, (16 - first) * sizeof(*in));
|
||||
memset(out + first, 0, (16 - first) * sizeof(*out));
|
||||
// Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case.
|
||||
if (coeff_type == TYPE_I16_AC) {
|
||||
memset(in + 1, 0, 15 * sizeof(*in));
|
||||
memset(out + 1, 0, 15 * sizeof(*out));
|
||||
} else {
|
||||
memset(in, 0, 16 * sizeof(*in));
|
||||
memset(out, 0, 16 * sizeof(*out));
|
||||
}
|
||||
if (best_path[0] == -1) {
|
||||
return 0; // skip!
|
||||
return 0; // skip!
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -775,9 +791,9 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
|||
for (y = 0, n = 0; y < 4; ++y) {
|
||||
for (x = 0; x < 4; ++x, ++n) {
|
||||
const int ctx = it->top_nz_[x] + it->left_nz_[y];
|
||||
const int non_zero =
|
||||
TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
|
||||
&dqm->y1_, dqm->lambda_trellis_i16_);
|
||||
const int non_zero = TrellisQuantizeBlock(
|
||||
enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
|
||||
dqm->lambda_trellis_i16_);
|
||||
it->top_nz_[x] = it->left_nz_[y] = non_zero;
|
||||
rd->y_ac_levels[n][0] = 0;
|
||||
nz |= non_zero << n;
|
||||
|
@ -818,7 +834,7 @@ static int ReconstructIntra4(VP8EncIterator* const it,
|
|||
if (DO_TRELLIS_I4 && it->do_trellis_) {
|
||||
const int x = it->i4_ & 3, y = it->i4_ >> 2;
|
||||
const int ctx = it->top_nz_[x] + it->left_nz_[y];
|
||||
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
|
||||
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
|
||||
dqm->lambda_trellis_i4_);
|
||||
} else {
|
||||
nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
|
||||
|
@ -927,9 +943,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
|||
for (y = 0; y < 2; ++y) {
|
||||
for (x = 0; x < 2; ++x, ++n) {
|
||||
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
|
||||
const int non_zero =
|
||||
TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
|
||||
&dqm->uv_, dqm->lambda_trellis_uv_);
|
||||
const int non_zero = TrellisQuantizeBlock(
|
||||
enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
|
||||
dqm->lambda_trellis_uv_);
|
||||
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
|
||||
nz |= non_zero << n;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ extern "C" {
|
|||
// version numbers
|
||||
#define ENC_MAJ_VERSION 1
|
||||
#define ENC_MIN_VERSION 2
|
||||
#define ENC_REV_VERSION 1
|
||||
#define ENC_REV_VERSION 2
|
||||
|
||||
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
|
||||
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
|
||||
|
|
|
@ -29,7 +29,7 @@ extern "C" {
|
|||
|
||||
#define MUX_MAJ_VERSION 1
|
||||
#define MUX_MIN_VERSION 2
|
||||
#define MUX_REV_VERSION 1
|
||||
#define MUX_REV_VERSION 2
|
||||
|
||||
// Chunk object.
|
||||
typedef struct WebPChunk WebPChunk;
|
||||
|
|
|
@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree,
|
|||
// especially when population counts are longer than 2**tree_limit, but
|
||||
// we are not planning to use this with extremely long blocks.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
// See https://en.wikipedia.org/wiki/Huffman_coding
|
||||
static void GenerateOptimalTree(const uint32_t* const histogram,
|
||||
int histogram_size,
|
||||
HuffmanTree* tree, int tree_depth_limit,
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
#define DFIX 4 // extra precision for ordered dithering
|
||||
#define DSIZE 4 // dithering size (must be a power of two)
|
||||
// cf. http://en.wikipedia.org/wiki/Ordered_dithering
|
||||
// cf. https://en.wikipedia.org/wiki/Ordered_dithering
|
||||
static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
|
||||
{ 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision
|
||||
{ 12, 4, 14, 6 },
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
|
||||
// and not multi-thread safe!).
|
||||
// An interesting alternative is valgrind's 'massif' tool:
|
||||
// http://valgrind.org/docs/manual/ms-manual.html
|
||||
// https://valgrind.org/docs/manual/ms-manual.html
|
||||
// Here is an example command line:
|
||||
/* valgrind --tool=massif --massif-out-file=massif.out \
|
||||
--stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc
|
||||
|
|
|
@ -85,7 +85,7 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
|
|||
// Upon return, the Y buffer has a stride returned as '*stride', while U and V
|
||||
// have a common stride returned as '*uv_stride'.
|
||||
// Return NULL in case of error.
|
||||
// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
|
||||
// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr
|
||||
WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height,
|
||||
uint8_t** u, uint8_t** v,
|
||||
|
|
Loading…
Reference in New Issue