libwebp: Sync with upstream 1.2.4
Changes: https://chromium.googlesource.com/webm/libwebp/+/1.2.4/NEWS
(cherry picked from commit e5e34f21fc
)
This commit is contained in:
parent
a8c01896cc
commit
b129ae2127
|
@ -12,123 +12,129 @@ thirdparty_obj = []
|
|||
if env["builtin_libwebp"]:
|
||||
thirdparty_dir = "#thirdparty/libwebp/"
|
||||
thirdparty_sources = [
|
||||
"dec/alpha_dec.c",
|
||||
"dec/buffer_dec.c",
|
||||
"dec/frame_dec.c",
|
||||
"dec/idec_dec.c",
|
||||
"dec/io_dec.c",
|
||||
"dec/quant_dec.c",
|
||||
"dec/tree_dec.c",
|
||||
"dec/vp8_dec.c",
|
||||
"dec/vp8l_dec.c",
|
||||
"dec/webp_dec.c",
|
||||
"demux/anim_decode.c",
|
||||
"demux/demux.c",
|
||||
"dsp/alpha_processing.c",
|
||||
"dsp/alpha_processing_mips_dsp_r2.c",
|
||||
"dsp/alpha_processing_neon.c",
|
||||
"dsp/alpha_processing_sse2.c",
|
||||
"dsp/alpha_processing_sse41.c",
|
||||
"dsp/cost.c",
|
||||
"dsp/cost_mips32.c",
|
||||
"dsp/cost_mips_dsp_r2.c",
|
||||
"dsp/cost_neon.c",
|
||||
"dsp/cost_sse2.c",
|
||||
"dsp/cpu.c",
|
||||
"dsp/dec.c",
|
||||
"dsp/dec_clip_tables.c",
|
||||
"dsp/dec_mips32.c",
|
||||
"dsp/dec_mips_dsp_r2.c",
|
||||
"dsp/dec_msa.c",
|
||||
"dsp/dec_neon.c",
|
||||
"dsp/dec_sse2.c",
|
||||
"dsp/dec_sse41.c",
|
||||
"dsp/enc.c",
|
||||
"dsp/enc_mips32.c",
|
||||
"dsp/enc_mips_dsp_r2.c",
|
||||
"dsp/enc_msa.c",
|
||||
"dsp/enc_neon.c",
|
||||
"dsp/enc_sse2.c",
|
||||
"dsp/enc_sse41.c",
|
||||
"dsp/filters.c",
|
||||
"dsp/filters_mips_dsp_r2.c",
|
||||
"dsp/filters_msa.c",
|
||||
"dsp/filters_neon.c",
|
||||
"dsp/filters_sse2.c",
|
||||
"dsp/lossless.c",
|
||||
"dsp/lossless_enc.c",
|
||||
"dsp/lossless_enc_mips32.c",
|
||||
"dsp/lossless_enc_mips_dsp_r2.c",
|
||||
"dsp/lossless_enc_msa.c",
|
||||
"dsp/lossless_enc_neon.c",
|
||||
"dsp/lossless_enc_sse2.c",
|
||||
"dsp/lossless_enc_sse41.c",
|
||||
"dsp/lossless_mips_dsp_r2.c",
|
||||
"dsp/lossless_msa.c",
|
||||
"dsp/lossless_neon.c",
|
||||
"dsp/lossless_sse2.c",
|
||||
"dsp/lossless_sse41.c",
|
||||
"dsp/rescaler.c",
|
||||
"dsp/rescaler_mips32.c",
|
||||
"dsp/rescaler_mips_dsp_r2.c",
|
||||
"dsp/rescaler_msa.c",
|
||||
"dsp/rescaler_neon.c",
|
||||
"dsp/rescaler_sse2.c",
|
||||
"dsp/ssim.c",
|
||||
"dsp/ssim_sse2.c",
|
||||
"dsp/upsampling.c",
|
||||
"dsp/upsampling_mips_dsp_r2.c",
|
||||
"dsp/upsampling_msa.c",
|
||||
"dsp/upsampling_neon.c",
|
||||
"dsp/upsampling_sse2.c",
|
||||
"dsp/upsampling_sse41.c",
|
||||
"dsp/yuv.c",
|
||||
"dsp/yuv_mips32.c",
|
||||
"dsp/yuv_mips_dsp_r2.c",
|
||||
"dsp/yuv_neon.c",
|
||||
"dsp/yuv_sse2.c",
|
||||
"dsp/yuv_sse41.c",
|
||||
"enc/alpha_enc.c",
|
||||
"enc/analysis_enc.c",
|
||||
"enc/backward_references_cost_enc.c",
|
||||
"enc/backward_references_enc.c",
|
||||
"enc/config_enc.c",
|
||||
"enc/cost_enc.c",
|
||||
"enc/filter_enc.c",
|
||||
"enc/frame_enc.c",
|
||||
"enc/histogram_enc.c",
|
||||
"enc/iterator_enc.c",
|
||||
"enc/near_lossless_enc.c",
|
||||
"enc/picture_csp_enc.c",
|
||||
"enc/picture_enc.c",
|
||||
"enc/picture_psnr_enc.c",
|
||||
"enc/picture_rescale_enc.c",
|
||||
"enc/picture_tools_enc.c",
|
||||
"enc/predictor_enc.c",
|
||||
"enc/quant_enc.c",
|
||||
"enc/syntax_enc.c",
|
||||
"enc/token_enc.c",
|
||||
"enc/tree_enc.c",
|
||||
"enc/vp8l_enc.c",
|
||||
"enc/webp_enc.c",
|
||||
"mux/anim_encode.c",
|
||||
"mux/muxedit.c",
|
||||
"mux/muxinternal.c",
|
||||
"mux/muxread.c",
|
||||
"utils/bit_reader_utils.c",
|
||||
"utils/bit_writer_utils.c",
|
||||
"utils/color_cache_utils.c",
|
||||
"utils/filters_utils.c",
|
||||
"utils/huffman_encode_utils.c",
|
||||
"utils/huffman_utils.c",
|
||||
"utils/quant_levels_dec_utils.c",
|
||||
"utils/quant_levels_utils.c",
|
||||
"utils/random_utils.c",
|
||||
"utils/rescaler_utils.c",
|
||||
"utils/thread_utils.c",
|
||||
"utils/utils.c",
|
||||
"sharpyuv/sharpyuv.c",
|
||||
"sharpyuv/sharpyuv_csp.c",
|
||||
"sharpyuv/sharpyuv_dsp.c",
|
||||
"sharpyuv/sharpyuv_gamma.c",
|
||||
"sharpyuv/sharpyuv_neon.c",
|
||||
"sharpyuv/sharpyuv_sse2.c",
|
||||
"src/dec/alpha_dec.c",
|
||||
"src/dec/buffer_dec.c",
|
||||
"src/dec/frame_dec.c",
|
||||
"src/dec/idec_dec.c",
|
||||
"src/dec/io_dec.c",
|
||||
"src/dec/quant_dec.c",
|
||||
"src/dec/tree_dec.c",
|
||||
"src/dec/vp8_dec.c",
|
||||
"src/dec/vp8l_dec.c",
|
||||
"src/dec/webp_dec.c",
|
||||
"src/demux/anim_decode.c",
|
||||
"src/demux/demux.c",
|
||||
"src/dsp/alpha_processing.c",
|
||||
"src/dsp/alpha_processing_mips_dsp_r2.c",
|
||||
"src/dsp/alpha_processing_neon.c",
|
||||
"src/dsp/alpha_processing_sse2.c",
|
||||
"src/dsp/alpha_processing_sse41.c",
|
||||
"src/dsp/cost.c",
|
||||
"src/dsp/cost_mips32.c",
|
||||
"src/dsp/cost_mips_dsp_r2.c",
|
||||
"src/dsp/cost_neon.c",
|
||||
"src/dsp/cost_sse2.c",
|
||||
"src/dsp/cpu.c",
|
||||
"src/dsp/dec.c",
|
||||
"src/dsp/dec_clip_tables.c",
|
||||
"src/dsp/dec_mips32.c",
|
||||
"src/dsp/dec_mips_dsp_r2.c",
|
||||
"src/dsp/dec_msa.c",
|
||||
"src/dsp/dec_neon.c",
|
||||
"src/dsp/dec_sse2.c",
|
||||
"src/dsp/dec_sse41.c",
|
||||
"src/dsp/enc.c",
|
||||
"src/dsp/enc_mips32.c",
|
||||
"src/dsp/enc_mips_dsp_r2.c",
|
||||
"src/dsp/enc_msa.c",
|
||||
"src/dsp/enc_neon.c",
|
||||
"src/dsp/enc_sse2.c",
|
||||
"src/dsp/enc_sse41.c",
|
||||
"src/dsp/filters.c",
|
||||
"src/dsp/filters_mips_dsp_r2.c",
|
||||
"src/dsp/filters_msa.c",
|
||||
"src/dsp/filters_neon.c",
|
||||
"src/dsp/filters_sse2.c",
|
||||
"src/dsp/lossless.c",
|
||||
"src/dsp/lossless_enc.c",
|
||||
"src/dsp/lossless_enc_mips32.c",
|
||||
"src/dsp/lossless_enc_mips_dsp_r2.c",
|
||||
"src/dsp/lossless_enc_msa.c",
|
||||
"src/dsp/lossless_enc_neon.c",
|
||||
"src/dsp/lossless_enc_sse2.c",
|
||||
"src/dsp/lossless_enc_sse41.c",
|
||||
"src/dsp/lossless_mips_dsp_r2.c",
|
||||
"src/dsp/lossless_msa.c",
|
||||
"src/dsp/lossless_neon.c",
|
||||
"src/dsp/lossless_sse2.c",
|
||||
"src/dsp/lossless_sse41.c",
|
||||
"src/dsp/rescaler.c",
|
||||
"src/dsp/rescaler_mips32.c",
|
||||
"src/dsp/rescaler_mips_dsp_r2.c",
|
||||
"src/dsp/rescaler_msa.c",
|
||||
"src/dsp/rescaler_neon.c",
|
||||
"src/dsp/rescaler_sse2.c",
|
||||
"src/dsp/ssim.c",
|
||||
"src/dsp/ssim_sse2.c",
|
||||
"src/dsp/upsampling.c",
|
||||
"src/dsp/upsampling_mips_dsp_r2.c",
|
||||
"src/dsp/upsampling_msa.c",
|
||||
"src/dsp/upsampling_neon.c",
|
||||
"src/dsp/upsampling_sse2.c",
|
||||
"src/dsp/upsampling_sse41.c",
|
||||
"src/dsp/yuv.c",
|
||||
"src/dsp/yuv_mips32.c",
|
||||
"src/dsp/yuv_mips_dsp_r2.c",
|
||||
"src/dsp/yuv_neon.c",
|
||||
"src/dsp/yuv_sse2.c",
|
||||
"src/dsp/yuv_sse41.c",
|
||||
"src/enc/alpha_enc.c",
|
||||
"src/enc/analysis_enc.c",
|
||||
"src/enc/backward_references_cost_enc.c",
|
||||
"src/enc/backward_references_enc.c",
|
||||
"src/enc/config_enc.c",
|
||||
"src/enc/cost_enc.c",
|
||||
"src/enc/filter_enc.c",
|
||||
"src/enc/frame_enc.c",
|
||||
"src/enc/histogram_enc.c",
|
||||
"src/enc/iterator_enc.c",
|
||||
"src/enc/near_lossless_enc.c",
|
||||
"src/enc/picture_csp_enc.c",
|
||||
"src/enc/picture_enc.c",
|
||||
"src/enc/picture_psnr_enc.c",
|
||||
"src/enc/picture_rescale_enc.c",
|
||||
"src/enc/picture_tools_enc.c",
|
||||
"src/enc/predictor_enc.c",
|
||||
"src/enc/quant_enc.c",
|
||||
"src/enc/syntax_enc.c",
|
||||
"src/enc/token_enc.c",
|
||||
"src/enc/tree_enc.c",
|
||||
"src/enc/vp8l_enc.c",
|
||||
"src/enc/webp_enc.c",
|
||||
"src/mux/anim_encode.c",
|
||||
"src/mux/muxedit.c",
|
||||
"src/mux/muxinternal.c",
|
||||
"src/mux/muxread.c",
|
||||
"src/utils/bit_reader_utils.c",
|
||||
"src/utils/bit_writer_utils.c",
|
||||
"src/utils/color_cache_utils.c",
|
||||
"src/utils/filters_utils.c",
|
||||
"src/utils/huffman_encode_utils.c",
|
||||
"src/utils/huffman_utils.c",
|
||||
"src/utils/quant_levels_dec_utils.c",
|
||||
"src/utils/quant_levels_utils.c",
|
||||
"src/utils/random_utils.c",
|
||||
"src/utils/rescaler_utils.c",
|
||||
"src/utils/thread_utils.c",
|
||||
"src/utils/utils.c",
|
||||
]
|
||||
thirdparty_sources = [thirdparty_dir + "src/" + file for file in thirdparty_sources]
|
||||
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
|
||||
|
||||
env_webp.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "src/"])
|
||||
|
||||
|
|
|
@ -266,12 +266,12 @@ from the Android NDK r18.
|
|||
## libwebp
|
||||
|
||||
- Upstream: https://chromium.googlesource.com/webm/libwebp/
|
||||
- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022)
|
||||
- Version: 1.2.4 (0d1f12546bd803099a60c070517a552483f3790e, 2022)
|
||||
- License: BSD-3-Clause
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
||||
- `src/*` except from: `.am`, `.rc` and `.in` files
|
||||
- `src/` and `sharpyuv/` except from: `.am`, `.rc` and `.in` files
|
||||
- `AUTHORS`, `COPYING`, `PATENTS`
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
Contributors:
|
||||
- Aidan O'Loan (aidanol at gmail dot com)
|
||||
- Alan Browning (browning at google dot com)
|
||||
- Alexandru Ardelean (ardeleanalex at gmail dot com)
|
||||
- Brian Ledger (brianpl at google dot com)
|
||||
- Charles Munger (clm at google dot com)
|
||||
- Cheng Yi (cyi at google dot com)
|
||||
- Christian Duvivier (cduvivier at google dot com)
|
||||
- Christopher Degawa (ccom at randomderp dot com)
|
||||
- Clement Courbet (courbet at google dot com)
|
||||
- Djordje Pesut (djordje dot pesut at imgtec dot com)
|
||||
- Frank Barchard (fbarchard at google dot com)
|
||||
- Hui Su (huisu at google dot com)
|
||||
- Ilya Kurdyukov (jpegqs at gmail dot com)
|
||||
- Ingvar Stepanyan (rreverser at google dot com)
|
||||
|
@ -22,6 +25,7 @@ Contributors:
|
|||
- Mans Rullgard (mans at mansr dot com)
|
||||
- Marcin Kowalczyk (qrczak at google dot com)
|
||||
- Martin Olsson (mnemo at minimum dot se)
|
||||
- Maryla Ustarroz-Calonge (maryla at google dot com)
|
||||
- Mikołaj Zalewski (mikolajz at google dot com)
|
||||
- Mislav Bradac (mislavm at google dot com)
|
||||
- Nico Weber (thakis at chromium dot org)
|
||||
|
|
|
@ -0,0 +1,498 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Sharp RGB to YUV conversion.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
#include "sharpyuv/sharpyuv_gamma.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Sharp RGB->YUV conversion
|
||||
|
||||
static const int kNumIterations = 4;
|
||||
|
||||
#define YUV_FIX 16 // fixed-point precision for RGB->YUV
|
||||
static const int kYuvHalf = 1 << (YUV_FIX - 1);
|
||||
|
||||
// Max bit depth so that intermediate calculations fit in 16 bits.
|
||||
static const int kMaxBitDepth = 14;
|
||||
|
||||
// Returns the precision shift to use based on the input rgb_bit_depth.
|
||||
static int GetPrecisionShift(int rgb_bit_depth) {
|
||||
// Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
|
||||
// bits if needed.
|
||||
return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
|
||||
: (kMaxBitDepth - rgb_bit_depth);
|
||||
}
|
||||
|
||||
typedef int16_t fixed_t; // signed type with extra precision for UV
|
||||
typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static uint8_t clip_8b(fixed_t v) {
|
||||
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
|
||||
}
|
||||
|
||||
static uint16_t clip(fixed_t v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static fixed_y_t clip_bit_depth(int y, int bit_depth) {
|
||||
const int max = (1 << bit_depth) - 1;
|
||||
return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int RGBToGray(int64_t r, int64_t g, int64_t b) {
|
||||
const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
|
||||
return (int)(luma >> YUV_FIX);
|
||||
}
|
||||
|
||||
static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
|
||||
int rgb_bit_depth) {
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
const uint32_t A = SharpYuvGammaToLinear(a, bit_depth);
|
||||
const uint32_t B = SharpYuvGammaToLinear(b, bit_depth);
|
||||
const uint32_t C = SharpYuvGammaToLinear(c, bit_depth);
|
||||
const uint32_t D = SharpYuvGammaToLinear(d, bit_depth);
|
||||
return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
|
||||
int rgb_bit_depth) {
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth);
|
||||
const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth);
|
||||
const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth);
|
||||
const uint32_t Y = RGBToGray(R, G, B);
|
||||
dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
|
||||
fixed_t* dst, int uv_w, int rgb_bit_depth) {
|
||||
int i;
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int r =
|
||||
ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
|
||||
src2[0 * uv_w + 1], rgb_bit_depth);
|
||||
const int g =
|
||||
ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
|
||||
src2[2 * uv_w + 1], rgb_bit_depth);
|
||||
const int b =
|
||||
ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
|
||||
src2[4 * uv_w + 1], rgb_bit_depth);
|
||||
const int W = RGBToGray(r, g, b);
|
||||
dst[0 * uv_w] = (fixed_t)(r - W);
|
||||
dst[1 * uv_w] = (fixed_t)(g - W);
|
||||
dst[2 * uv_w] = (fixed_t)(b - W);
|
||||
dst += 1;
|
||||
src1 += 2;
|
||||
src2 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
|
||||
int i;
|
||||
assert(w > 0);
|
||||
for (i = 0; i < w; ++i) {
|
||||
y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
|
||||
const int v0 = (A * 3 + B + 2) >> 2;
|
||||
return clip_bit_depth(v0 + W0, bit_depth);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE int Shift(int v, int shift) {
|
||||
return (shift >= 0) ? (v << shift) : (v >> -shift);
|
||||
}
|
||||
|
||||
static void ImportOneRow(const uint8_t* const r_ptr,
|
||||
const uint8_t* const g_ptr,
|
||||
const uint8_t* const b_ptr,
|
||||
int rgb_step,
|
||||
int rgb_bit_depth,
|
||||
int pic_width,
|
||||
fixed_y_t* const dst) {
|
||||
// Convert the rgb_step from a number of bytes to a number of uint8_t or
|
||||
// uint16_t values depending the bit depth.
|
||||
const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
|
||||
int i;
|
||||
const int w = (pic_width + 1) & ~1;
|
||||
for (i = 0; i < pic_width; ++i) {
|
||||
const int off = i * step;
|
||||
const int shift = GetPrecisionShift(rgb_bit_depth);
|
||||
if (rgb_bit_depth == 8) {
|
||||
dst[i + 0 * w] = Shift(r_ptr[off], shift);
|
||||
dst[i + 1 * w] = Shift(g_ptr[off], shift);
|
||||
dst[i + 2 * w] = Shift(b_ptr[off], shift);
|
||||
} else {
|
||||
dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
|
||||
dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
|
||||
dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
|
||||
}
|
||||
}
|
||||
if (pic_width & 1) { // replicate rightmost pixel
|
||||
dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
|
||||
dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
|
||||
dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void InterpolateTwoRows(const fixed_y_t* const best_y,
|
||||
const fixed_t* prev_uv,
|
||||
const fixed_t* cur_uv,
|
||||
const fixed_t* next_uv,
|
||||
int w,
|
||||
fixed_y_t* out1,
|
||||
fixed_y_t* out2,
|
||||
int rgb_bit_depth) {
|
||||
const int uv_w = w >> 1;
|
||||
const int len = (w - 1) >> 1; // length to filter
|
||||
int k = 3;
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
while (k-- > 0) { // process each R/G/B segments in turn
|
||||
// special boundary case for i==0
|
||||
out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
|
||||
out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
|
||||
|
||||
SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
|
||||
bit_depth);
|
||||
SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
|
||||
bit_depth);
|
||||
|
||||
// special boundary case for i == w - 1 when w is even
|
||||
if (!(w & 1)) {
|
||||
out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
|
||||
best_y[w - 1 + 0], bit_depth);
|
||||
out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
|
||||
best_y[w - 1 + w], bit_depth);
|
||||
}
|
||||
out1 += w;
|
||||
out2 += w;
|
||||
prev_uv += uv_w;
|
||||
cur_uv += uv_w;
|
||||
next_uv += uv_w;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
|
||||
const int coeffs[4], int sfix) {
|
||||
const int srounder = 1 << (YUV_FIX + sfix - 1);
|
||||
const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
|
||||
coeffs[3] + srounder;
|
||||
return (luma >> (YUV_FIX + sfix));
|
||||
}
|
||||
|
||||
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
|
||||
uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
|
||||
int u_stride, uint8_t* v_ptr, int v_stride,
|
||||
int rgb_bit_depth,
|
||||
int yuv_bit_depth, int width, int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
int i, j;
|
||||
const fixed_t* const best_uv_base = best_uv;
|
||||
const int w = (width + 1) & ~1;
|
||||
const int h = (height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
const int sfix = GetPrecisionShift(rgb_bit_depth);
|
||||
const int yuv_max = (1 << yuv_bit_depth) - 1;
|
||||
|
||||
for (best_uv = best_uv_base, j = 0; j < height; ++j) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
const int off = (i >> 1);
|
||||
const int W = best_y[i];
|
||||
const int r = best_uv[off + 0 * uv_w] + W;
|
||||
const int g = best_uv[off + 1 * uv_w] + W;
|
||||
const int b = best_uv[off + 2 * uv_w] + W;
|
||||
const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
|
||||
if (yuv_bit_depth <= 8) {
|
||||
y_ptr[i] = clip_8b(y);
|
||||
} else {
|
||||
((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
|
||||
}
|
||||
}
|
||||
best_y += w;
|
||||
best_uv += (j & 1) * 3 * uv_w;
|
||||
y_ptr += y_stride;
|
||||
}
|
||||
for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int off = i;
|
||||
// Note r, g and b values here are off by W, but a constant offset on all
|
||||
// 3 components doesn't change the value of u and v with a YCbCr matrix.
|
||||
const int r = best_uv[off + 0 * uv_w];
|
||||
const int g = best_uv[off + 1 * uv_w];
|
||||
const int b = best_uv[off + 2 * uv_w];
|
||||
const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
|
||||
const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
|
||||
if (yuv_bit_depth <= 8) {
|
||||
u_ptr[i] = clip_8b(u);
|
||||
v_ptr[i] = clip_8b(v);
|
||||
} else {
|
||||
((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
|
||||
((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
|
||||
}
|
||||
}
|
||||
best_uv += 3 * uv_w;
|
||||
u_ptr += u_stride;
|
||||
v_ptr += v_stride;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main function
|
||||
|
||||
static void* SafeMalloc(uint64_t nmemb, size_t size) {
|
||||
const uint64_t total_size = nmemb * (uint64_t)size;
|
||||
if (total_size != (size_t)total_size) return NULL;
|
||||
return malloc((size_t)total_size);
|
||||
}
|
||||
|
||||
#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
|
||||
|
||||
static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
|
||||
const uint8_t* b_ptr, int rgb_step, int rgb_stride,
|
||||
int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
|
||||
uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
|
||||
int v_stride, int yuv_bit_depth, int width,
|
||||
int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
// we expand the right/bottom border if needed
|
||||
const int w = (width + 1) & ~1;
|
||||
const int h = (height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
uint64_t prev_diff_y_sum = ~0;
|
||||
int j, iter;
|
||||
|
||||
// TODO(skal): allocate one big memory chunk. But for now, it's easier
|
||||
// for valgrind debugging to have several chunks.
|
||||
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
|
||||
fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
|
||||
fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
|
||||
fixed_y_t* best_y = best_y_base;
|
||||
fixed_y_t* target_y = target_y_base;
|
||||
fixed_t* best_uv = best_uv_base;
|
||||
fixed_t* target_uv = target_uv_base;
|
||||
const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
|
||||
int ok;
|
||||
assert(w > 0);
|
||||
assert(h > 0);
|
||||
|
||||
if (best_y_base == NULL || best_uv_base == NULL ||
|
||||
target_y_base == NULL || target_uv_base == NULL ||
|
||||
best_rgb_y == NULL || best_rgb_uv == NULL ||
|
||||
tmp_buffer == NULL) {
|
||||
ok = 0;
|
||||
goto End;
|
||||
}
|
||||
|
||||
// Import RGB samples to W/RGB representation.
|
||||
for (j = 0; j < height; j += 2) {
|
||||
const int is_last_row = (j == height - 1);
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
|
||||
// prepare two rows of input
|
||||
ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
|
||||
src1);
|
||||
if (!is_last_row) {
|
||||
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
|
||||
rgb_step, rgb_bit_depth, width, src2);
|
||||
} else {
|
||||
memcpy(src2, src1, 3 * w * sizeof(*src2));
|
||||
}
|
||||
StoreGray(src1, best_y + 0, w);
|
||||
StoreGray(src2, best_y + w, w);
|
||||
|
||||
UpdateW(src1, target_y, w, rgb_bit_depth);
|
||||
UpdateW(src2, target_y + w, w, rgb_bit_depth);
|
||||
UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth);
|
||||
memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
r_ptr += 2 * rgb_stride;
|
||||
g_ptr += 2 * rgb_stride;
|
||||
b_ptr += 2 * rgb_stride;
|
||||
}
|
||||
|
||||
// Iterate and resolve clipping conflicts.
|
||||
for (iter = 0; iter < kNumIterations; ++iter) {
|
||||
const fixed_t* cur_uv = best_uv_base;
|
||||
const fixed_t* prev_uv = best_uv_base;
|
||||
uint64_t diff_y_sum = 0;
|
||||
|
||||
best_y = best_y_base;
|
||||
best_uv = best_uv_base;
|
||||
target_y = target_y_base;
|
||||
target_uv = target_uv_base;
|
||||
for (j = 0; j < h; j += 2) {
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
{
|
||||
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
|
||||
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
|
||||
src1, src2, rgb_bit_depth);
|
||||
prev_uv = cur_uv;
|
||||
cur_uv = next_uv;
|
||||
}
|
||||
|
||||
UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth);
|
||||
UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth);
|
||||
UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth);
|
||||
|
||||
// update two rows of Y and one row of RGB
|
||||
diff_y_sum +=
|
||||
SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
|
||||
rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
|
||||
SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
|
||||
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
}
|
||||
// test exit condition
|
||||
if (iter > 0) {
|
||||
if (diff_y_sum < diff_y_threshold) break;
|
||||
if (diff_y_sum > prev_diff_y_sum) break;
|
||||
}
|
||||
prev_diff_y_sum = diff_y_sum;
|
||||
}
|
||||
|
||||
// final reconstruction
|
||||
ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
|
||||
u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
|
||||
width, height, yuv_matrix);
|
||||
|
||||
End:
|
||||
free(best_y_base);
|
||||
free(best_uv_base);
|
||||
free(target_y_base);
|
||||
free(target_uv_base);
|
||||
free(best_rgb_y);
|
||||
free(best_rgb_uv);
|
||||
free(tmp_buffer);
|
||||
return ok;
|
||||
}
|
||||
#undef SAFE_ALLOC
|
||||
|
||||
// Hidden exported init function.
|
||||
// By default SharpYuvConvert calls it with NULL. If needed, users can declare
|
||||
// it as extern and call it with a VP8CPUInfo function.
|
||||
extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
|
||||
void SharpYuvInit(VP8CPUInfo cpu_info_func) {
|
||||
static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
|
||||
const int initialized =
|
||||
(sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used);
|
||||
if (cpu_info_func == NULL && initialized) return;
|
||||
if (sharpyuv_last_cpuinfo_used == cpu_info_func) return;
|
||||
|
||||
SharpYuvInitDsp(cpu_info_func);
|
||||
if (!initialized) {
|
||||
SharpYuvInitGammaTables();
|
||||
}
|
||||
|
||||
sharpyuv_last_cpuinfo_used = cpu_info_func;
|
||||
}
|
||||
|
||||
int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
|
||||
const void* b_ptr, int rgb_step, int rgb_stride,
|
||||
int rgb_bit_depth, void* y_ptr, int y_stride,
|
||||
void* u_ptr, int u_stride, void* v_ptr,
|
||||
int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
SharpYuvConversionMatrix scaled_matrix;
|
||||
const int rgb_max = (1 << rgb_bit_depth) - 1;
|
||||
const int rgb_round = 1 << (rgb_bit_depth - 1);
|
||||
const int yuv_max = (1 << yuv_bit_depth) - 1;
|
||||
const int sfix = GetPrecisionShift(rgb_bit_depth);
|
||||
|
||||
if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
|
||||
r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
|
||||
u_ptr == NULL || v_ptr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
|
||||
rgb_bit_depth != 16) {
|
||||
return 0;
|
||||
}
|
||||
if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
|
||||
return 0;
|
||||
}
|
||||
if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
|
||||
// Step/stride should be even for uint16_t buffers.
|
||||
return 0;
|
||||
}
|
||||
if (yuv_bit_depth > 8 &&
|
||||
(y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
|
||||
// Stride should be even for uint16_t buffers.
|
||||
return 0;
|
||||
}
|
||||
SharpYuvInit(NULL);
|
||||
|
||||
// Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
|
||||
// rgb->yuv conversion matrix.
|
||||
if (rgb_bit_depth == yuv_bit_depth) {
|
||||
memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < 3; ++i) {
|
||||
scaled_matrix.rgb_to_y[i] =
|
||||
(yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
|
||||
scaled_matrix.rgb_to_u[i] =
|
||||
(yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
|
||||
scaled_matrix.rgb_to_v[i] =
|
||||
(yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
|
||||
}
|
||||
}
|
||||
// Also incorporate precision change scaling.
|
||||
scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
|
||||
scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
|
||||
scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
|
||||
|
||||
return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
|
||||
rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
|
||||
v_ptr, v_stride, yuv_bit_depth, width, height,
|
||||
&scaled_matrix);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Sharp RGB to YUV conversion.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_H_
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// SharpYUV API version following the convention from semver.org
|
||||
#define SHARPYUV_VERSION_MAJOR 0
|
||||
#define SHARPYUV_VERSION_MINOR 1
|
||||
#define SHARPYUV_VERSION_PATCH 0
|
||||
// Version as a uint32_t. The major number is the high 8 bits.
|
||||
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
|
||||
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
|
||||
(((MAJOR) << 24) | ((MINOR) << 16) | (PATCH))
|
||||
#define SHARPYUV_VERSION \
|
||||
SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
|
||||
SHARPYUV_VERSION_PATCH)
|
||||
|
||||
// RGB to YUV conversion matrix, in 16 bit fixed point.
|
||||
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then y, u and v values are divided by 1<<16 and rounded.
|
||||
typedef struct {
|
||||
int rgb_to_y[4];
|
||||
int rgb_to_u[4];
|
||||
int rgb_to_v[4];
|
||||
} SharpYuvConversionMatrix;
|
||||
|
||||
// Converts RGB to YUV420 using a downsampling algorithm that minimizes
|
||||
// artefacts caused by chroma subsampling.
|
||||
// This is slower than standard downsampling (averaging of 4 UV values).
|
||||
// Assumes that the image will be upsampled using a bilinear filter. If nearest
|
||||
// neighbor is used instead, the upsampled image might look worse than with
|
||||
// standard downsampling.
|
||||
// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
|
||||
// to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
|
||||
// rgb_step: distance in bytes between two horizontally adjacent pixels on the
|
||||
// r, g and b channels. If rgb_bit_depth is > 8, it should be a
|
||||
// multiple of 2.
|
||||
// rgb_stride: distance in bytes between two vertically adjacent pixels on the
|
||||
// r, g, and b channels. If rgb_bit_depth is > 8, it should be a
|
||||
// multiple of 2.
|
||||
// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
|
||||
// Note: 16 bit input is truncated to 14 bits before conversion to yuv.
|
||||
// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
|
||||
// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels. Should
|
||||
// point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
|
||||
// otherwise.
|
||||
// y_stride, u_stride, v_stride: distance in bytes between two vertically
|
||||
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
|
||||
// should be multiples of 2.
|
||||
// width, height: width and height of the image in pixels
|
||||
int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
|
||||
int rgb_step, int rgb_stride, int rgb_bit_depth,
|
||||
void* y_ptr, int y_stride, void* u_ptr, int u_stride,
|
||||
void* v_ptr, int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvConversionMatrix* yuv_matrix);
|
||||
|
||||
// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
|
||||
// support (it's rarely used in practice, especially for images).
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_H_
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Colorspace utilities.
|
||||
|
||||
#include "sharpyuv/sharpyuv_csp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
|
||||
|
||||
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
SharpYuvConversionMatrix* matrix) {
|
||||
const float kr = yuv_color_space->kr;
|
||||
const float kb = yuv_color_space->kb;
|
||||
const float kg = 1.0f - kr - kb;
|
||||
const float cr = 0.5f / (1.0f - kb);
|
||||
const float cb = 0.5f / (1.0f - kr);
|
||||
|
||||
const int shift = yuv_color_space->bit_depth - 8;
|
||||
|
||||
const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
|
||||
float scale_y = 1.0f;
|
||||
float add_y = 0.0f;
|
||||
float scale_u = cr;
|
||||
float scale_v = cb;
|
||||
float add_uv = (float)(128 << shift);
|
||||
assert(yuv_color_space->bit_depth >= 8);
|
||||
|
||||
if (yuv_color_space->range == kSharpYuvRangeLimited) {
|
||||
scale_y *= (219 << shift) / denom;
|
||||
scale_u *= (224 << shift) / denom;
|
||||
scale_v *= (224 << shift) / denom;
|
||||
add_y = (float)(16 << shift);
|
||||
}
|
||||
|
||||
matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
|
||||
matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
|
||||
matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
|
||||
matrix->rgb_to_y[3] = ToFixed16(add_y);
|
||||
|
||||
matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
|
||||
matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
|
||||
matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u);
|
||||
matrix->rgb_to_u[3] = ToFixed16(add_uv);
|
||||
|
||||
matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v);
|
||||
matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v);
|
||||
matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v);
|
||||
matrix->rgb_to_v[3] = ToFixed16(add_uv);
|
||||
}
|
||||
|
||||
// Matrices are in YUV_FIX fixed point precision.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
|
||||
static const SharpYuvConversionMatrix kWebpMatrix = {
|
||||
{16839, 33059, 6420, 16 << 16},
|
||||
{-9719, -19081, 28800, 128 << 16},
|
||||
{28800, -24116, -4684, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
|
||||
{16829, 33039, 6416, 16 << 16},
|
||||
{-9714, -19071, 28784, 128 << 16},
|
||||
{28784, -24103, -4681, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec601FullMatrix = {
|
||||
{19595, 38470, 7471, 0},
|
||||
{-11058, -21710, 32768, 128 << 16},
|
||||
{32768, -27439, -5329, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
|
||||
{11966, 40254, 4064, 16 << 16},
|
||||
{-6596, -22189, 28784, 128 << 16},
|
||||
{28784, -26145, -2639, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec709FullMatrix = {
|
||||
{13933, 46871, 4732, 0},
|
||||
{-7509, -25259, 32768, 128 << 16},
|
||||
{32768, -29763, -3005, 128 << 16},
|
||||
};
|
||||
|
||||
const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
|
||||
SharpYuvMatrixType matrix_type) {
|
||||
switch (matrix_type) {
|
||||
case kSharpYuvMatrixWebp:
|
||||
return &kWebpMatrix;
|
||||
case kSharpYuvMatrixRec601Limited:
|
||||
return &kRec601LimitedMatrix;
|
||||
case kSharpYuvMatrixRec601Full:
|
||||
return &kRec601FullMatrix;
|
||||
case kSharpYuvMatrixRec709Limited:
|
||||
return &kRec709LimitedMatrix;
|
||||
case kSharpYuvMatrixRec709Full:
|
||||
return &kRec709FullMatrix;
|
||||
case kSharpYuvMatrixNum:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Colorspace utilities.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Range of YUV values.
|
||||
typedef enum {
|
||||
kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit)
|
||||
kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit)
|
||||
} SharpYuvRange;
|
||||
|
||||
// Constants that define a YUV color space.
|
||||
typedef struct {
|
||||
// Kr and Kb are defined such that:
|
||||
// Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
|
||||
float kr;
|
||||
float kb;
|
||||
int bit_depth; // 8, 10 or 12
|
||||
SharpYuvRange range;
|
||||
} SharpYuvColorSpace;
|
||||
|
||||
// Fills in 'matrix' for the given YUVColorSpace.
|
||||
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
SharpYuvConversionMatrix* matrix);
|
||||
|
||||
// Enums for precomputed conversion matrices.
|
||||
typedef enum {
|
||||
kSharpYuvMatrixWebp = 0,
|
||||
kSharpYuvMatrixRec601Limited,
|
||||
kSharpYuvMatrixRec601Full,
|
||||
kSharpYuvMatrixRec709Limited,
|
||||
kSharpYuvMatrixRec709Full,
|
||||
kSharpYuvMatrixNum
|
||||
} SharpYuvMatrixType;
|
||||
|
||||
// Returns a pointer to a matrix for one of the predefined colorspaces.
|
||||
const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
|
||||
SharpYuvMatrixType matrix_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
|
@ -0,0 +1,102 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static uint16_t clip(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip(new_y, max_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
int i;
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
for (i = 0; i < len; ++i, ++A, ++B) {
|
||||
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
|
||||
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
|
||||
out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len, int bit_depth);
|
||||
void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
|
||||
int len);
|
||||
void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth);
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
void SharpYuvInitDsp(VP8CPUInfo cpu_info_func) {
|
||||
(void)cpu_info_func;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_C;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_C;
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (cpu_info_func == NULL || cpu_info_func(kSSE2)) {
|
||||
InitSharpYuvSSE2();
|
||||
}
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE || cpu_info_func == NULL || cpu_info_func(kNEON)) {
|
||||
InitSharpYuvNEON();
|
||||
}
|
||||
#endif // WEBP_HAVE_NEON
|
||||
|
||||
assert(SharpYuvUpdateY != NULL);
|
||||
assert(SharpYuvUpdateRGB != NULL);
|
||||
assert(SharpYuvFilterRow != NULL);
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len, int bit_depth);
|
||||
extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
|
||||
int16_t* dst, int len);
|
||||
extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth);
|
||||
|
||||
void SharpYuvInitDsp(VP8CPUInfo cpu_info_func);
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
|
@ -0,0 +1,114 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Gamma correction utilities.
|
||||
|
||||
#include "sharpyuv/sharpyuv_gamma.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// Gamma correction compensates loss of resolution during chroma subsampling.
|
||||
// Size of pre-computed table for converting from gamma to linear.
|
||||
#define GAMMA_TO_LINEAR_TAB_BITS 10
|
||||
#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
|
||||
static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
|
||||
#define LINEAR_TO_GAMMA_TAB_BITS 9
|
||||
#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
|
||||
static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
|
||||
|
||||
static const double kGammaF = 1. / 0.45;
|
||||
#define GAMMA_TO_LINEAR_BITS 16
|
||||
|
||||
static volatile int kGammaTablesSOk = 0;
|
||||
void SharpYuvInitGammaTables(void) {
|
||||
assert(GAMMA_TO_LINEAR_BITS <= 16);
|
||||
if (!kGammaTablesSOk) {
|
||||
int v;
|
||||
const double a = 0.09929682680944;
|
||||
const double thresh = 0.018053968510807;
|
||||
const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
|
||||
// Precompute gamma to linear table.
|
||||
{
|
||||
const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
|
||||
const double a_rec = 1. / (1. + a);
|
||||
for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
|
||||
const double g = norm * v;
|
||||
double value;
|
||||
if (g <= thresh * 4.5) {
|
||||
value = g / 4.5;
|
||||
} else {
|
||||
value = pow(a_rec * (g + a), kGammaF);
|
||||
}
|
||||
kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
|
||||
kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
|
||||
}
|
||||
// Precompute linear to gamma table.
|
||||
{
|
||||
const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
|
||||
for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
|
||||
const double g = scale * v;
|
||||
double value;
|
||||
if (g <= thresh) {
|
||||
value = 4.5 * g;
|
||||
} else {
|
||||
value = (1. + a) * pow(g, 1. / kGammaF) - a;
|
||||
}
|
||||
kLinearToGammaTabS[v] =
|
||||
(uint32_t)(final_scale * value + 0.5);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
|
||||
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
|
||||
}
|
||||
kGammaTablesSOk = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int Shift(int v, int shift) {
|
||||
return (shift >= 0) ? (v << shift) : (v >> -shift);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
|
||||
int tab_pos_shift_right,
|
||||
int tab_value_shift) {
|
||||
const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
|
||||
// fractional part, in 'tab_pos_shift' fixed-point precision
|
||||
const uint32_t x = v - (tab_pos << tab_pos_shift_right); // fractional part
|
||||
// v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
|
||||
const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
|
||||
const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
|
||||
// Final interpolation.
|
||||
const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
|
||||
const int half =
|
||||
(tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
|
||||
const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth) {
|
||||
const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
|
||||
if (shift > 0) {
|
||||
return kGammaToLinearTabS[v << shift];
|
||||
}
|
||||
return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
|
||||
}
|
||||
|
||||
uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth) {
|
||||
return FixedPointInterpolation(
|
||||
value, kLinearToGammaTabS,
|
||||
(GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
|
||||
bit_depth - GAMMA_TO_LINEAR_BITS);
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Gamma correction utilities.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Initializes precomputed tables. Must be called once before calling
|
||||
// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
|
||||
void SharpYuvInitGammaTables(void);
|
||||
|
||||
// Converts a gamma color value on 'bit_depth' bits to a 16 bit linear value.
|
||||
uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth);
|
||||
|
||||
// Converts a 16 bit linear color value to a gamma value on 'bit_depth' bits.
|
||||
uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
|
@ -0,0 +1,182 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
static uint16_t clip_NEON(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
const int16x8_t max = vdupq_n_s16(max_y);
|
||||
uint64x2_t sum = vdupq_n_u64(0);
|
||||
uint64_t diff;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
|
||||
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
|
||||
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_y
|
||||
const int16x8_t F = vaddq_s16(C, D); // new_y
|
||||
const uint16x8_t H =
|
||||
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
|
||||
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
|
||||
vst1q_u16(dst + i, H);
|
||||
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
|
||||
}
|
||||
diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)(dst[i]) + diff_y;
|
||||
dst[i] = clip_NEON(new_y, max_y);
|
||||
diff += (uint64_t)(abs(diff_y));
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vld1q_s16(ref + i);
|
||||
const int16x8_t B = vld1q_s16(src + i);
|
||||
const int16x8_t C = vld1q_s16(dst + i);
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_uv
|
||||
const int16x8_t E = vaddq_s16(C, D); // new_uv
|
||||
vst1q_s16(dst + i, E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const int16x8_t max = vdupq_n_s16(max_y);
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t a0 = vld1q_s16(A + i + 0);
|
||||
const int16x8_t a1 = vld1q_s16(A + i + 1);
|
||||
const int16x8_t b0 = vld1q_s16(B + i + 0);
|
||||
const int16x8_t b1 = vld1q_s16(B + i + 1);
|
||||
const int16x8_t a0b1 = vaddq_s16(a0, b1);
|
||||
const int16x8_t a1b0 = vaddq_s16(a1, b0);
|
||||
const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
|
||||
const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
|
||||
const int16x8_t e0 = vrhaddq_s16(c1, a0);
|
||||
const int16x8_t e1 = vrhaddq_s16(c0, a1);
|
||||
const int16x8x2_t f = vzipq_s16(e0, e1);
|
||||
const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
|
||||
const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
|
||||
const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
|
||||
const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
|
||||
const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
|
||||
const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
|
||||
vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
|
||||
vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const uint16x8_t max = vdupq_n_u16(max_y);
|
||||
for (i = 0; i + 4 <= len; i += 4) {
|
||||
const int16x4_t a0 = vld1_s16(A + i + 0);
|
||||
const int16x4_t a1 = vld1_s16(A + i + 1);
|
||||
const int16x4_t b0 = vld1_s16(B + i + 0);
|
||||
const int16x4_t b1 = vld1_s16(B + i + 1);
|
||||
const int32x4_t a0b1 = vaddl_s16(a0, b1);
|
||||
const int32x4_t a1b0 = vaddl_s16(a1, b0);
|
||||
const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3);
|
||||
const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3);
|
||||
const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0));
|
||||
const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1));
|
||||
const int32x4x2_t f = vzipq_s32(e0, e1);
|
||||
|
||||
const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i));
|
||||
const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g));
|
||||
const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g));
|
||||
const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1));
|
||||
const uint16x8_t i_clamped = vminq_u16(i_16, max);
|
||||
vst1q_u16(out + 2 * i + 0, i_clamped);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
if (bit_depth <= 10) {
|
||||
SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth);
|
||||
} else {
|
||||
SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_NEON;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
void InitSharpYuvNEON(void) {}
|
||||
|
||||
#endif // WEBP_USE_NEON
|
|
@ -0,0 +1,204 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <stdlib.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
static uint16_t clip_SSE2(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
uint64_t diff = 0;
|
||||
uint32_t tmp[4];
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
__m128i sum = zero;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_y
|
||||
const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
|
||||
const __m128i F = _mm_add_epi16(C, D); // new_y
|
||||
const __m128i G = _mm_or_si128(E, one); // -1 or 1
|
||||
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
|
||||
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
|
||||
_mm_storeu_si128((__m128i*)(dst + i), H);
|
||||
sum = _mm_add_epi32(sum, I);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||
diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_SSE2(new_y, max_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i = 0;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
|
||||
const __m128i E = _mm_add_epi16(C, D); // new_uv
|
||||
_mm_storeu_si128((__m128i*)(dst + i), E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi16(8);
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
|
||||
const __m128i a0b1 = _mm_add_epi16(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi16(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi16(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi16(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi16(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi16(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
|
||||
const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
|
||||
const __m128i h0 = _mm_add_epi16(g0, f0);
|
||||
const __m128i h1 = _mm_add_epi16(g1, f1);
|
||||
const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
|
||||
const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE __m128i s16_to_s32(__m128i in) {
|
||||
return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16);
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi32(8);
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= len; i += 4) {
|
||||
const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0)));
|
||||
const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1)));
|
||||
const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0)));
|
||||
const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1)));
|
||||
const __m128i a0b1 = _mm_add_epi32(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi32(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi32(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi32(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi32(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi32(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi32(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi32(e0, e1);
|
||||
const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1));
|
||||
const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), final);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
if (bit_depth <= 10) {
|
||||
SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth);
|
||||
} else {
|
||||
SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_SSE2;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_SSE2;
|
||||
}
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
void InitSharpYuvSSE2(void) {}
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
|
@ -32,7 +32,7 @@ extern "C" {
|
|||
// version numbers
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 2
|
||||
#define DEC_REV_VERSION 2
|
||||
#define DEC_REV_VERSION 4
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||
|
|
|
@ -178,7 +178,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
// Decodes the next Huffman code from bit-stream.
|
||||
// FillBitWindow(br) needs to be called at minimum every second call
|
||||
// VP8LFillBitWindow(br) needs to be called at minimum every second call
|
||||
// to ReadSymbol, in order to pre-fetch enough bits.
|
||||
static WEBP_INLINE int ReadSymbol(const HuffmanCode* table,
|
||||
VP8LBitReader* const br) {
|
||||
|
@ -321,7 +321,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
|
|||
// The first code is either 1 bit or 8 bit code.
|
||||
int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
|
||||
code_lengths[symbol] = 1;
|
||||
// The second code (if present), is always 8 bit long.
|
||||
// The second code (if present), is always 8 bits long.
|
||||
if (num_symbols == 2) {
|
||||
symbol = VP8LReadBits(br, 8);
|
||||
code_lengths[symbol] = 1;
|
||||
|
@ -1281,7 +1281,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
|
|||
uint8_t* const new_data = (uint8_t*)new_color_map;
|
||||
new_color_map[0] = transform->data_[0];
|
||||
for (i = 4; i < 4 * num_colors; ++i) {
|
||||
// Equivalent to AddPixelEq(), on a byte-basis.
|
||||
// Equivalent to VP8LAddPixels(), on a byte-basis.
|
||||
new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
|
||||
}
|
||||
for (; i < 4 * final_num_colors; ++i) {
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 2
|
||||
#define DMUX_REV_VERSION 2
|
||||
#define DMUX_REV_VERSION 4
|
||||
|
||||
typedef struct {
|
||||
size_t start_; // start location of the data
|
||||
|
@ -614,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
|
|||
|
||||
while (f != NULL) {
|
||||
const int cur_frame_set = f->frame_num_;
|
||||
int frame_count = 0;
|
||||
|
||||
// Check frame properties.
|
||||
for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
|
||||
|
@ -649,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
|
|||
dmux->canvas_width_, dmux->canvas_height_)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
++frame_count;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
|
|
@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
|
|||
static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
|
@ -107,6 +107,7 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
|||
dst += dst_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask != 0xffffffffu);
|
||||
|
@ -135,7 +136,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
|||
static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
|
@ -157,6 +158,7 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
|||
alpha += alpha_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask == 0xffffffffu);
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#if defined(WEBP_HAVE_NEON_RTCD)
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -0,0 +1,254 @@
|
|||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// CPU detection functions and macros.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DSP_CPU_H_
|
||||
#define WEBP_DSP_CPU_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
|
||||
#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
#define LOCAL_GCC_VERSION 0
|
||||
#define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
#define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
#define LOCAL_CLANG_VERSION 0
|
||||
#define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
|
||||
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
|
||||
// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
|
||||
// vtbl4_u8(); a fix was made in 16.6.
|
||||
#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
|
||||
(_MSC_VER >= 1926 && defined(_M_ARM64)))
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
|
||||
#define WEBP_HAVE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
|
||||
(__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
|
||||
defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#undef WEBP_TSAN_IGNORE_FUNCTION
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define WEBP_MSAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) \
|
||||
do { \
|
||||
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func##_last_cpuinfo_used; \
|
||||
static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func##_lock)) break; \
|
||||
if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func##_last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func##_lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) \
|
||||
do { \
|
||||
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func##_last_cpuinfo_used; \
|
||||
if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func##_last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures. This is only meant to silence unaligned loads on platforms that
|
||||
// are known to support them.
|
||||
#undef WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
|
||||
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures related to unsigned integer overflows. This is only meant to
|
||||
// silence cases where this well defined behavior is expected.
|
||||
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
|
||||
// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
|
||||
#if !defined(WEBP_OFFSET_PTR)
|
||||
#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
kSlowSSSE3, // special feature for slow SSSE3 architectures
|
||||
kSSE4_1,
|
||||
kAVX,
|
||||
kAVX2,
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_DSP_CPU_H_
|
|
@ -18,6 +18,7 @@
|
|||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -43,225 +44,6 @@ extern "C" {
|
|||
#define WEBP_RESTRICT
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// CPU detection
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
|
||||
# define LOCAL_GCC_PREREQ(maj, min) \
|
||||
(LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
# define LOCAL_GCC_VERSION 0
|
||||
# define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
# define LOCAL_CLANG_VERSION 0
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
|
||||
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
|
||||
// arm_neon.h.
|
||||
#if defined(_MSC_VER) && \
|
||||
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
|
||||
(_MSC_VER >= 1920 && defined(_M_ARM64)))
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
|
||||
#define WEBP_HAVE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(__mips__) && !defined(__mips64) && \
|
||||
defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#undef WEBP_TSAN_IGNORE_FUNCTION
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func ## _lock)) break; \
|
||||
if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func ## _lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { \
|
||||
WEBP_DSP_INIT(name ## _body); \
|
||||
} \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures. This is only meant to silence unaligned loads on platforms that
|
||||
// are known to support them.
|
||||
#undef WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF \
|
||||
__attribute__((no_sanitize("undefined")))
|
||||
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures related to unsigned integer overflows. This is only meant to
|
||||
// silence cases where this well defined behavior is expected.
|
||||
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
|
||||
// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
|
||||
#if !defined(WEBP_OFFSET_PTR)
|
||||
#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
kSlowSSSE3, // special feature for slow SSSE3 architectures
|
||||
kSSE4_1,
|
||||
kAVX,
|
||||
kAVX2,
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init stub generator
|
||||
|
@ -550,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
|||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
|
||||
// utilities for accurate RGB->YUV conversion
|
||||
extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len);
|
||||
extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
|
||||
int16_t* dst, int len);
|
||||
extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
|
||||
int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
|
||||
|
|
|
@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
|||
// -----------------------------------------------------------------------------
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
|
||||
const int Y[256]);
|
||||
|
||||
|
@ -198,7 +198,7 @@ typedef struct { // small struct to hold counters
|
|||
} VP8LStreaks;
|
||||
|
||||
typedef struct { // small struct to hold bit entropy results
|
||||
double entropy; // entropy
|
||||
float entropy; // entropy
|
||||
uint32_t sum; // sum of the population
|
||||
int nonzeros; // number of non-zero elements in the population
|
||||
uint32_t max_val; // maximum value in the population
|
||||
|
|
|
@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
|
|||
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
|
||||
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
int i;
|
||||
double retval = 0.;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
const int x = X[i];
|
||||
|
@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
|||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
return (float)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
|
@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static double ExtraCost_C(const uint32_t* population, int length) {
|
||||
static float ExtraCost_C(const uint32_t* population, int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
float cost = 0.f;
|
||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||
return cost;
|
||||
}
|
||||
|
||||
static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
float cost = 0.f;
|
||||
for (i = 2; i < length - 2; ++i) {
|
||||
const int xy = X[i + 2] + Y[i + 2];
|
||||
cost += (i >> 1) * xy;
|
||||
|
|
|
@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
|
|||
// cost += i * *(pop + 1);
|
||||
// pop += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// return (float)cost;
|
||||
static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
int i, temp0, temp1;
|
||||
const uint32_t* pop = &population[4];
|
||||
const uint32_t* const LoopEnd = &population[length];
|
||||
|
@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
|||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
return (float)((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
// C version of this function:
|
||||
|
@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
|||
// pX += 2;
|
||||
// pY += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
// return (float)cost;
|
||||
static float ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
|
@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
|
|||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
return (float)((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
#define HUFFMAN_COST_PASS \
|
||||
|
@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
|||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const uint32_t end = ((size) / 4) * 4;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
int i;
|
||||
ASM_START
|
||||
ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
|
||||
ASM_END_0
|
||||
for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const uint32_t end = ((size) / 4) * 4;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
int i;
|
||||
ASM_START
|
||||
ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
|
||||
ASM_END_1
|
||||
for (i = end; i < size; ++i) pout[i] += pa[i];
|
||||
for (i = 0; i < size - end; ++i) pout[i] += pa[i];
|
||||
}
|
||||
|
||||
#undef ASM_END_1
|
||||
|
|
|
@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
|||
|
||||
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
int i;
|
||||
double retval = 0.;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
|
@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
|||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
return (float)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
|
@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
|||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_y(new_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i, ++A, ++B) {
|
||||
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
|
||||
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
|
||||
out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
|
@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
|||
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store);
|
||||
|
||||
uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len);
|
||||
void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len);
|
||||
void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
extern void WebPInitConvertARGBToYUVSSE41(void);
|
||||
extern void WebPInitConvertARGBToYUVNEON(void);
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
WebPConvertARGBToY = ConvertARGBToY_C;
|
||||
|
@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
|||
|
||||
WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitConvertARGBToYUVSSE2();
|
||||
WebPInitSharpYUVSSE2();
|
||||
}
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
|
@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
|||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitConvertARGBToYUVNEON();
|
||||
WebPInitSharpYUVNEON();
|
||||
}
|
||||
#endif // WEBP_HAVE_NEON
|
||||
|
||||
|
@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
|||
assert(WebPConvertRGB24ToY != NULL);
|
||||
assert(WebPConvertBGR24ToY != NULL);
|
||||
assert(WebPConvertRGBA32ToUV != NULL);
|
||||
assert(WebPSharpYUVUpdateY != NULL);
|
||||
assert(WebPSharpYUVUpdateRGB != NULL);
|
||||
assert(WebPSharpYUVFilterRow != NULL);
|
||||
}
|
||||
|
|
|
@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
|
|||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y_NEON(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
int i;
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
const int16x8_t max = vdupq_n_s16(MAX_Y);
|
||||
uint64x2_t sum = vdupq_n_u64(0);
|
||||
uint64_t diff;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
|
||||
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
|
||||
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_y
|
||||
const int16x8_t F = vaddq_s16(C, D); // new_y
|
||||
const uint16x8_t H =
|
||||
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
|
||||
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
|
||||
vst1q_u16(dst + i, H);
|
||||
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
|
||||
}
|
||||
diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)(dst[i]) + diff_y;
|
||||
dst[i] = clip_y_NEON(new_y);
|
||||
diff += (uint64_t)(abs(diff_y));
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vld1q_s16(ref + i);
|
||||
const int16x8_t B = vld1q_s16(src + i);
|
||||
const int16x8_t C = vld1q_s16(dst + i);
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_uv
|
||||
const int16x8_t E = vaddq_s16(C, D); // new_uv
|
||||
vst1q_s16(dst + i, E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
const int16x8_t max = vdupq_n_s16(MAX_Y);
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t a0 = vld1q_s16(A + i + 0);
|
||||
const int16x8_t a1 = vld1q_s16(A + i + 1);
|
||||
const int16x8_t b0 = vld1q_s16(B + i + 0);
|
||||
const int16x8_t b1 = vld1q_s16(B + i + 1);
|
||||
const int16x8_t a0b1 = vaddq_s16(a0, b1);
|
||||
const int16x8_t a1b0 = vaddq_s16(a1, b0);
|
||||
const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
|
||||
const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
|
||||
const int16x8_t d0 = vaddq_s16(c1, a0);
|
||||
const int16x8_t d1 = vaddq_s16(c0, a1);
|
||||
const int16x8_t e0 = vrshrq_n_s16(d0, 1);
|
||||
const int16x8_t e1 = vrshrq_n_s16(d1, 1);
|
||||
const int16x8x2_t f = vzipq_s16(e0, e1);
|
||||
const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
|
||||
const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
|
||||
const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
|
||||
const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
|
||||
const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
|
||||
const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
|
||||
vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
|
||||
vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#undef MAX_Y
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
|
||||
WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
|
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
|
|
|
@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
|
|||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
uint64_t diff = 0;
|
||||
uint32_t tmp[4];
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(MAX_Y);
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
__m128i sum = zero;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_y
|
||||
const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
|
||||
const __m128i F = _mm_add_epi16(C, D); // new_y
|
||||
const __m128i G = _mm_or_si128(E, one); // -1 or 1
|
||||
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
|
||||
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
|
||||
_mm_storeu_si128((__m128i*)(dst + i), H);
|
||||
sum = _mm_add_epi32(sum, I);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||
diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_y(new_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i = 0;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
|
||||
const __m128i E = _mm_add_epi16(C, D); // new_uv
|
||||
_mm_storeu_si128((__m128i*)(dst + i), E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi16(8);
|
||||
const __m128i max = _mm_set1_epi16(MAX_Y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
|
||||
const __m128i a0b1 = _mm_add_epi16(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi16(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi16(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi16(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi16(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi16(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
|
||||
const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
|
||||
const __m128i h0 = _mm_add_epi16(g0, f0);
|
||||
const __m128i h1 = _mm_add_epi16(g1, f1);
|
||||
const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
|
||||
const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
|
||||
WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
|
|
@ -86,7 +86,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
|
|||
// a decoder bug related to alpha with color cache.
|
||||
// See: https://code.google.com/p/webp/issues/detail?id=239
|
||||
// Need to re-enable this later.
|
||||
ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK);
|
||||
ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0);
|
||||
WebPPictureFree(&picture);
|
||||
ok = ok && !bw->error_;
|
||||
if (!ok) {
|
||||
|
|
|
@ -15,10 +15,11 @@
|
|||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/color_cache_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
|
@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
|
|||
const PixOrCopy v);
|
||||
|
||||
typedef struct {
|
||||
double alpha_[VALUES_IN_BYTE];
|
||||
double red_[VALUES_IN_BYTE];
|
||||
double blue_[VALUES_IN_BYTE];
|
||||
double distance_[NUM_DISTANCE_CODES];
|
||||
double* literal_;
|
||||
float alpha_[VALUES_IN_BYTE];
|
||||
float red_[VALUES_IN_BYTE];
|
||||
float blue_[VALUES_IN_BYTE];
|
||||
float distance_[NUM_DISTANCE_CODES];
|
||||
float* literal_;
|
||||
} CostModel;
|
||||
|
||||
static void ConvertPopulationCountTableToBitEstimates(
|
||||
int num_symbols, const uint32_t population_counts[], double output[]) {
|
||||
int num_symbols, const uint32_t population_counts[], float output[]) {
|
||||
uint32_t sum = 0;
|
||||
int nonzeros = 0;
|
||||
int i;
|
||||
|
@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates(
|
|||
if (nonzeros <= 1) {
|
||||
memset(output, 0, num_symbols * sizeof(*output));
|
||||
} else {
|
||||
const double logsum = VP8LFastLog2(sum);
|
||||
const float logsum = VP8LFastLog2(sum);
|
||||
for (i = 0; i < num_symbols; ++i) {
|
||||
output[i] = logsum - VP8LFastLog2(population_counts[i]);
|
||||
}
|
||||
|
@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
|
|||
}
|
||||
|
||||
ConvertPopulationCountTableToBitEstimates(
|
||||
VP8LHistogramNumCodes(histo->palette_code_bits_),
|
||||
histo->literal_, m->literal_);
|
||||
VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
|
||||
m->literal_);
|
||||
ConvertPopulationCountTableToBitEstimates(
|
||||
VALUES_IN_BYTE, histo->red_, m->red_);
|
||||
ConvertPopulationCountTableToBitEstimates(
|
||||
|
@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
|
|||
return ok;
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
|
||||
static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) {
|
||||
return m->alpha_[v >> 24] +
|
||||
m->red_[(v >> 16) & 0xff] +
|
||||
m->literal_[(v >> 8) & 0xff] +
|
||||
m->blue_[v & 0xff];
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
|
||||
static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) {
|
||||
const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
|
||||
return m->literal_[literal_idx];
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetLengthCost(const CostModel* const m,
|
||||
uint32_t length) {
|
||||
static WEBP_INLINE float GetLengthCost(const CostModel* const m,
|
||||
uint32_t length) {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(length, &code, &extra_bits);
|
||||
return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
|
||||
uint32_t distance) {
|
||||
static WEBP_INLINE float GetDistanceCost(const CostModel* const m,
|
||||
uint32_t distance) {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(distance, &code, &extra_bits);
|
||||
return m->distance_[code] + extra_bits;
|
||||
|
@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
|
|||
const uint32_t* const argb, VP8LColorCache* const hashers,
|
||||
const CostModel* const cost_model, int idx, int use_color_cache,
|
||||
float prev_cost, float* const cost, uint16_t* const dist_array) {
|
||||
double cost_val = prev_cost;
|
||||
float cost_val = prev_cost;
|
||||
const uint32_t color = argb[idx];
|
||||
const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
|
||||
if (ix >= 0) {
|
||||
// use_color_cache is true and hashers contains color
|
||||
const double mul0 = 0.68;
|
||||
const float mul0 = 0.68f;
|
||||
cost_val += GetCacheCost(cost_model, ix) * mul0;
|
||||
} else {
|
||||
const double mul1 = 0.82;
|
||||
const float mul1 = 0.82f;
|
||||
if (use_color_cache) VP8LColorCacheInsert(hashers, color);
|
||||
cost_val += GetLiteralCost(cost_model, color) * mul1;
|
||||
}
|
||||
if (cost[idx] > cost_val) {
|
||||
cost[idx] = (float)cost_val;
|
||||
cost[idx] = cost_val;
|
||||
dist_array[idx] = 1; // only one is inserted.
|
||||
}
|
||||
}
|
||||
|
@ -172,7 +173,7 @@ struct CostInterval {
|
|||
|
||||
// The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
|
||||
typedef struct {
|
||||
double cost_;
|
||||
float cost_;
|
||||
int start_;
|
||||
int end_; // Exclusive.
|
||||
} CostCacheInterval;
|
||||
|
@ -187,7 +188,7 @@ typedef struct {
|
|||
int count_; // The number of stored intervals.
|
||||
CostCacheInterval* cache_intervals_;
|
||||
size_t cache_intervals_size_;
|
||||
double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k).
|
||||
float cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k).
|
||||
float* costs_;
|
||||
uint16_t* dist_array_;
|
||||
// Most of the time, we only need few intervals -> use a free-list, to avoid
|
||||
|
@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager,
|
|||
CostManagerInitFreeList(manager);
|
||||
|
||||
// Fill in the cost_cache_.
|
||||
manager->cache_intervals_size_ = 1;
|
||||
manager->cost_cache_[0] = GetLengthCost(cost_model, 0);
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
// Has to be done in two passes due to a GCC bug on i686
|
||||
// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
|
||||
for (i = 0; i < cost_cache_size; ++i) {
|
||||
manager->cost_cache_[i] = GetLengthCost(cost_model, i);
|
||||
}
|
||||
manager->cache_intervals_size_ = 1;
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
// Get the number of bound intervals.
|
||||
if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
|
||||
++manager->cache_intervals_size_;
|
||||
|
@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager,
|
|||
cur->end_ = 1;
|
||||
cur->cost_ = manager->cost_cache_[0];
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
const double cost_val = manager->cost_cache_[i];
|
||||
const float cost_val = manager->cost_cache_[i];
|
||||
if (cost_val != cur->cost_) {
|
||||
++cur;
|
||||
// Initialize an interval.
|
||||
|
@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager,
|
|||
}
|
||||
cur->end_ = i + 1;
|
||||
}
|
||||
assert((size_t)(cur - manager->cache_intervals_) + 1 ==
|
||||
manager->cache_intervals_size_);
|
||||
}
|
||||
|
||||
manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
|
||||
|
@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager,
|
|||
return 0;
|
||||
}
|
||||
// Set the initial costs_ high for every pixel as we will keep the minimum.
|
||||
for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f;
|
||||
for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
|
|||
// If handling the interval or one of its subintervals becomes to heavy, its
|
||||
// contribution is added to the costs right away.
|
||||
static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
double distance_cost, int position,
|
||||
float distance_cost, int position,
|
||||
int len) {
|
||||
size_t i;
|
||||
CostInterval* interval = manager->head_;
|
||||
|
@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
|
|||
const int k = j - position;
|
||||
float cost_tmp;
|
||||
assert(k >= 0 && k < MAX_LENGTH);
|
||||
cost_tmp = (float)(distance_cost + manager->cost_cache_[k]);
|
||||
cost_tmp = distance_cost + manager->cost_cache_[k];
|
||||
|
||||
if (manager->costs_[j] > cost_tmp) {
|
||||
manager->costs_[j] = cost_tmp;
|
||||
|
@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
|
|||
const int end = position + (cost_cache_intervals[i].end_ > len
|
||||
? len
|
||||
: cost_cache_intervals[i].end_);
|
||||
const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_);
|
||||
const float cost = distance_cost + cost_cache_intervals[i].cost_;
|
||||
|
||||
for (; interval != NULL && interval->start_ < end;
|
||||
interval = interval_next) {
|
||||
|
@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly(
|
|||
const int pix_count = xsize * ysize;
|
||||
const int use_color_cache = (cache_bits > 0);
|
||||
const size_t literal_array_size =
|
||||
sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
|
||||
((cache_bits > 0) ? (1 << cache_bits) : 0));
|
||||
sizeof(float) * (VP8LHistogramNumCodes(cache_bits));
|
||||
const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
|
||||
CostModel* const cost_model =
|
||||
(CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
|
||||
VP8LColorCache hashers;
|
||||
CostManager* cost_manager =
|
||||
(CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager));
|
||||
(CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager));
|
||||
int offset_prev = -1, len_prev = -1;
|
||||
double offset_cost = -1;
|
||||
float offset_cost = -1.f;
|
||||
int first_offset_is_constant = -1; // initialized with 'impossible' value
|
||||
int reach = 0;
|
||||
|
||||
if (cost_model == NULL || cost_manager == NULL) goto Error;
|
||||
|
||||
cost_model->literal_ = (double*)(cost_model + 1);
|
||||
cost_model->literal_ = (float*)(cost_model + 1);
|
||||
if (use_color_cache) {
|
||||
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
|
||||
if (!cc_init) goto Error;
|
||||
|
@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly(
|
|||
}
|
||||
|
||||
ok = !refs->error_;
|
||||
Error:
|
||||
Error:
|
||||
if (cc_init) VP8LColorCacheClear(&hashers);
|
||||
CostManagerClear(cost_manager);
|
||||
WebPSafeFree(cost_model);
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
// Author: Jyrki Alakuijala (jyrki@google.com)
|
||||
//
|
||||
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
@ -17,10 +19,11 @@
|
|||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/color_cache_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/encode.h"
|
||||
|
||||
#define MIN_BLOCK_SIZE 256 // minimum block size for backward references
|
||||
|
||||
|
@ -255,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) {
|
|||
|
||||
int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
const uint32_t* const argb, int xsize, int ysize,
|
||||
int low_effort) {
|
||||
int low_effort, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent) {
|
||||
const int size = xsize * ysize;
|
||||
const int iter_max = GetMaxItersForQuality(quality);
|
||||
const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
|
||||
int remaining_percent = percent_range;
|
||||
int percent_start = *percent;
|
||||
int pos;
|
||||
int argb_comp;
|
||||
uint32_t base_position;
|
||||
|
@ -276,7 +282,13 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
|||
|
||||
hash_to_first_index =
|
||||
(int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
|
||||
if (hash_to_first_index == NULL) return 0;
|
||||
if (hash_to_first_index == NULL) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
percent_range = remaining_percent / 2;
|
||||
remaining_percent -= percent_range;
|
||||
|
||||
// Set the int32_t array to -1.
|
||||
memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
|
||||
|
@ -323,12 +335,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
|||
hash_to_first_index[hash_code] = pos++;
|
||||
argb_comp = argb_comp_next;
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * pos / (size - 2), percent)) {
|
||||
WebPSafeFree(hash_to_first_index);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// Process the penultimate pixel.
|
||||
chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
|
||||
|
||||
WebPSafeFree(hash_to_first_index);
|
||||
|
||||
percent_start += percent_range;
|
||||
if (!WebPReportProgress(pic, percent_start, percent)) return 0;
|
||||
percent_range = remaining_percent;
|
||||
|
||||
// Find the best match interval at each pixel, defined by an offset to the
|
||||
// pixel and a length. The right-most pixel cannot match anything to the right
|
||||
// (hence a best length of 0) and the left-most pixel nothing to the left
|
||||
|
@ -417,8 +439,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
|||
max_base_position = base_position;
|
||||
}
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(pic,
|
||||
percent_start + percent_range *
|
||||
(size - 2 - base_position) /
|
||||
(size - 2),
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
||||
return WebPReportProgress(pic, percent_start + percent_range, percent);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
|
||||
|
@ -728,7 +759,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
|
|||
int* const best_cache_bits) {
|
||||
int i;
|
||||
const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
|
||||
double entropy_min = MAX_ENTROPY;
|
||||
float entropy_min = MAX_ENTROPY;
|
||||
int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
|
||||
VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
|
||||
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
|
||||
|
@ -813,14 +844,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
|
|||
}
|
||||
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
const double entropy = VP8LHistogramEstimateBits(histos[i]);
|
||||
const float entropy = VP8LHistogramEstimateBits(histos[i]);
|
||||
if (i == 0 || entropy < entropy_min) {
|
||||
entropy_min = entropy;
|
||||
*best_cache_bits = i;
|
||||
}
|
||||
}
|
||||
ok = 1;
|
||||
Error:
|
||||
Error:
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
|
||||
VP8LFreeHistogram(histos[i]);
|
||||
|
@ -890,7 +921,7 @@ static int GetBackwardReferences(int width, int height,
|
|||
int i, lz77_type;
|
||||
// Index 0 is for a color cache, index 1 for no cache (if needed).
|
||||
int lz77_types_best[2] = {0, 0};
|
||||
double bit_costs_best[2] = {DBL_MAX, DBL_MAX};
|
||||
float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
|
||||
VP8LHashChain hash_chain_box;
|
||||
VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
|
||||
int status = 0;
|
||||
|
@ -902,7 +933,7 @@ static int GetBackwardReferences(int width, int height,
|
|||
for (lz77_type = 1; lz77_types_to_try;
|
||||
lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
|
||||
int res = 0;
|
||||
double bit_cost = 0.;
|
||||
float bit_cost = 0.f;
|
||||
if ((lz77_types_to_try & lz77_type) == 0) continue;
|
||||
switch (lz77_type) {
|
||||
case kLZ77RLE:
|
||||
|
@ -976,15 +1007,16 @@ static int GetBackwardReferences(int width, int height,
|
|||
const VP8LHashChain* const hash_chain_tmp =
|
||||
(lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
|
||||
const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
|
||||
if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
|
||||
hash_chain_tmp, &refs[i],
|
||||
refs_tmp)) {
|
||||
double bit_cost_trace;
|
||||
VP8LHistogramCreate(histo, refs_tmp, cache_bits);
|
||||
bit_cost_trace = VP8LHistogramEstimateBits(histo);
|
||||
if (bit_cost_trace < bit_costs_best[i]) {
|
||||
BackwardRefsSwap(refs_tmp, &refs[i]);
|
||||
}
|
||||
float bit_cost_trace;
|
||||
if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
|
||||
hash_chain_tmp, &refs[i],
|
||||
refs_tmp)) {
|
||||
goto Error;
|
||||
}
|
||||
VP8LHistogramCreate(histo, refs_tmp, cache_bits);
|
||||
bit_cost_trace = VP8LHistogramEstimateBits(histo);
|
||||
if (bit_cost_trace < bit_costs_best[i]) {
|
||||
BackwardRefsSwap(refs_tmp, &refs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1000,31 +1032,37 @@ static int GetBackwardReferences(int width, int height,
|
|||
}
|
||||
status = 1;
|
||||
|
||||
Error:
|
||||
Error:
|
||||
VP8LHashChainClear(&hash_chain_box);
|
||||
VP8LFreeHistogram(histo);
|
||||
return status;
|
||||
}
|
||||
|
||||
WebPEncodingError VP8LGetBackwardReferences(
|
||||
int VP8LGetBackwardReferences(
|
||||
int width, int height, const uint32_t* const argb, int quality,
|
||||
int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
|
||||
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
|
||||
int* const cache_bits_best) {
|
||||
int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
if (low_effort) {
|
||||
VP8LBackwardRefs* refs_best;
|
||||
*cache_bits_best = cache_bits_max;
|
||||
refs_best = GetBackwardReferencesLowEffort(
|
||||
width, height, argb, cache_bits_best, hash_chain, refs);
|
||||
if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
|
||||
if (refs_best == NULL) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
return 0;
|
||||
}
|
||||
// Set it in first position.
|
||||
BackwardRefsSwap(refs_best, &refs[0]);
|
||||
} else {
|
||||
if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
|
||||
cache_bits_max, do_no_cache, hash_chain, refs,
|
||||
cache_bits_best)) {
|
||||
return VP8_ENC_ERROR_OUT_OF_MEMORY;
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return VP8_ENC_OK;
|
||||
|
||||
return WebPReportProgress(pic, *percent + percent_range, percent);
|
||||
}
|
||||
|
|
|
@ -134,10 +134,11 @@ struct VP8LHashChain {
|
|||
|
||||
// Must be called first, to set size.
|
||||
int VP8LHashChainInit(VP8LHashChain* const p, int size);
|
||||
// Pre-compute the best matches for argb.
|
||||
// Pre-compute the best matches for argb. pic and percent are for progress.
|
||||
int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
const uint32_t* const argb, int xsize, int ysize,
|
||||
int low_effort);
|
||||
int low_effort, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent);
|
||||
void VP8LHashChainClear(VP8LHashChain* const p); // release memory
|
||||
|
||||
static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
|
||||
|
@ -227,11 +228,14 @@ enum VP8LLZ77Type {
|
|||
// VP8LBackwardRefs is put in the first element, the best value with no-cache in
|
||||
// the second element.
|
||||
// In both cases, the last element is used as temporary internally.
|
||||
WebPEncodingError VP8LGetBackwardReferences(
|
||||
// pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LGetBackwardReferences(
|
||||
int width, int height, const uint32_t* const argb, int quality,
|
||||
int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
|
||||
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
|
||||
int* const cache_bits_best);
|
||||
int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -13,15 +13,17 @@
|
|||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
#define MAX_COST 1.e38
|
||||
#define MAX_BIT_COST FLT_MAX
|
||||
|
||||
// Number of partitions for the three dominant (literal, red and blue) symbol
|
||||
// costs.
|
||||
|
@ -228,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
|
|||
// -----------------------------------------------------------------------------
|
||||
// Entropy-related functions.
|
||||
|
||||
static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
double mix;
|
||||
static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
float mix;
|
||||
if (entropy->nonzeros < 5) {
|
||||
if (entropy->nonzeros <= 1) {
|
||||
return 0;
|
||||
|
@ -238,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
|||
// Let's mix in a bit of entropy to favor good clustering when
|
||||
// distributions of these are combined.
|
||||
if (entropy->nonzeros == 2) {
|
||||
return 0.99 * entropy->sum + 0.01 * entropy->entropy;
|
||||
return 0.99f * entropy->sum + 0.01f * entropy->entropy;
|
||||
}
|
||||
// No matter what the entropy says, we cannot be better than min_limit
|
||||
// with Huffman coding. I am mixing a bit of entropy into the
|
||||
// min_limit since it produces much better (~0.5 %) compression results
|
||||
// perhaps because of better entropy clustering.
|
||||
if (entropy->nonzeros == 3) {
|
||||
mix = 0.95;
|
||||
mix = 0.95f;
|
||||
} else {
|
||||
mix = 0.7; // nonzeros == 4.
|
||||
mix = 0.7f; // nonzeros == 4.
|
||||
}
|
||||
} else {
|
||||
mix = 0.627;
|
||||
mix = 0.627f;
|
||||
}
|
||||
|
||||
{
|
||||
double min_limit = 2 * entropy->sum - entropy->max_val;
|
||||
min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
|
||||
float min_limit = 2.f * entropy->sum - entropy->max_val;
|
||||
min_limit = mix * min_limit + (1.f - mix) * entropy->entropy;
|
||||
return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
|
||||
}
|
||||
}
|
||||
|
||||
double VP8LBitsEntropy(const uint32_t* const array, int n) {
|
||||
float VP8LBitsEntropy(const uint32_t* const array, int n) {
|
||||
VP8LBitEntropy entropy;
|
||||
VP8LBitsEntropyUnrefined(array, n, &entropy);
|
||||
|
||||
return BitsEntropyRefine(&entropy);
|
||||
}
|
||||
|
||||
static double InitialHuffmanCost(void) {
|
||||
static float InitialHuffmanCost(void) {
|
||||
// Small bias because Huffman code length is typically not stored in
|
||||
// full length.
|
||||
static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
|
||||
static const double kSmallBias = 9.1;
|
||||
static const float kSmallBias = 9.1f;
|
||||
return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
|
||||
}
|
||||
|
||||
// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
|
||||
static double FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
static float FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
// The constants in this function are experimental and got rounded from
|
||||
// their original values in 1/8 when switched to 1/1024.
|
||||
double retval = InitialHuffmanCost();
|
||||
float retval = InitialHuffmanCost();
|
||||
// Second coefficient: Many zeros in the histogram are covered efficiently
|
||||
// by a run-length encode. Originally 2/8.
|
||||
retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
|
||||
retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1];
|
||||
// Second coefficient: Constant values are encoded less efficiently, but still
|
||||
// RLE'ed. Originally 6/8.
|
||||
retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
|
||||
retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1];
|
||||
// 0s are usually encoded more efficiently than non-0s.
|
||||
// Originally 15/8.
|
||||
retval += 1.796875 * stats->streaks[0][0];
|
||||
retval += 1.796875f * stats->streaks[0][0];
|
||||
// Originally 26/8.
|
||||
retval += 3.28125 * stats->streaks[1][0];
|
||||
retval += 3.28125f * stats->streaks[1][0];
|
||||
return retval;
|
||||
}
|
||||
|
||||
// Get the symbol entropy for the distribution 'population'.
|
||||
// Set 'trivial_sym', if there's only one symbol present in the distribution.
|
||||
static double PopulationCost(const uint32_t* const population, int length,
|
||||
uint32_t* const trivial_sym,
|
||||
uint8_t* const is_used) {
|
||||
static float PopulationCost(const uint32_t* const population, int length,
|
||||
uint32_t* const trivial_sym,
|
||||
uint8_t* const is_used) {
|
||||
VP8LBitEntropy bit_entropy;
|
||||
VP8LStreaks stats;
|
||||
VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
|
||||
|
@ -314,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length,
|
|||
|
||||
// trivial_at_end is 1 if the two histograms only have one element that is
|
||||
// non-zero: both the zero-th one, or both the last one.
|
||||
static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
|
||||
const uint32_t* const Y,
|
||||
int length, int is_X_used,
|
||||
int is_Y_used,
|
||||
int trivial_at_end) {
|
||||
static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length,
|
||||
int is_X_used, int is_Y_used,
|
||||
int trivial_at_end) {
|
||||
VP8LStreaks stats;
|
||||
if (trivial_at_end) {
|
||||
// This configuration is due to palettization that transforms an indexed
|
||||
|
@ -356,7 +357,7 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
|
|||
}
|
||||
|
||||
// Estimates the Entropy + Huffman + other block overhead size cost.
|
||||
double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
return
|
||||
PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
|
||||
NULL, &p->is_used_[0])
|
||||
|
@ -373,8 +374,7 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
|||
|
||||
static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
double cost_threshold,
|
||||
double* cost) {
|
||||
float cost_threshold, float* cost) {
|
||||
const int palette_code_bits = a->palette_code_bits_;
|
||||
int trivial_at_end = 0;
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
|
@ -439,12 +439,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
|
|||
// Since the previous score passed is 'cost_threshold', we only need to compare
|
||||
// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
|
||||
// early.
|
||||
static double HistogramAddEval(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out,
|
||||
double cost_threshold) {
|
||||
double cost = 0;
|
||||
const double sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
static float HistogramAddEval(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out, float cost_threshold) {
|
||||
float cost = 0;
|
||||
const float sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
cost_threshold += sum_cost;
|
||||
|
||||
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
|
||||
|
@ -459,10 +458,10 @@ static double HistogramAddEval(const VP8LHistogram* const a,
|
|||
// Same as HistogramAddEval(), except that the resulting histogram
|
||||
// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
|
||||
// the term C(b) which is constant over all the evaluations.
|
||||
static double HistogramAddThresh(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
double cost_threshold) {
|
||||
double cost;
|
||||
static float HistogramAddThresh(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
float cost_threshold) {
|
||||
float cost;
|
||||
assert(a != NULL && b != NULL);
|
||||
cost = -a->bit_cost_;
|
||||
GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
|
||||
|
@ -473,24 +472,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a,
|
|||
|
||||
// The structure to keep track of cost range for the three dominant entropy
|
||||
// symbols.
|
||||
// TODO(skal): Evaluate if float can be used here instead of double for
|
||||
// representing the entropy costs.
|
||||
typedef struct {
|
||||
double literal_max_;
|
||||
double literal_min_;
|
||||
double red_max_;
|
||||
double red_min_;
|
||||
double blue_max_;
|
||||
double blue_min_;
|
||||
float literal_max_;
|
||||
float literal_min_;
|
||||
float red_max_;
|
||||
float red_min_;
|
||||
float blue_max_;
|
||||
float blue_min_;
|
||||
} DominantCostRange;
|
||||
|
||||
static void DominantCostRangeInit(DominantCostRange* const c) {
|
||||
c->literal_max_ = 0.;
|
||||
c->literal_min_ = MAX_COST;
|
||||
c->literal_min_ = MAX_BIT_COST;
|
||||
c->red_max_ = 0.;
|
||||
c->red_min_ = MAX_COST;
|
||||
c->red_min_ = MAX_BIT_COST;
|
||||
c->blue_max_ = 0.;
|
||||
c->blue_min_ = MAX_COST;
|
||||
c->blue_min_ = MAX_BIT_COST;
|
||||
}
|
||||
|
||||
static void UpdateDominantCostRange(
|
||||
|
@ -505,10 +502,9 @@ static void UpdateDominantCostRange(
|
|||
|
||||
static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
uint32_t alpha_sym, red_sym, blue_sym;
|
||||
const double alpha_cost =
|
||||
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
|
||||
&h->is_used_[3]);
|
||||
const double distance_cost =
|
||||
const float alpha_cost =
|
||||
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]);
|
||||
const float distance_cost =
|
||||
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
|
||||
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
||||
|
@ -529,10 +525,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
|
|||
}
|
||||
}
|
||||
|
||||
static int GetBinIdForEntropy(double min, double max, double val) {
|
||||
const double range = max - min;
|
||||
static int GetBinIdForEntropy(float min, float max, float val) {
|
||||
const float range = max - min;
|
||||
if (range > 0.) {
|
||||
const double delta = val - min;
|
||||
const float delta = val - min;
|
||||
return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
|
||||
} else {
|
||||
return 0;
|
||||
|
@ -641,15 +637,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
|
|||
|
||||
// Merges some histograms with same bin_id together if it's advantageous.
|
||||
// Sets the remaining histograms to NULL.
|
||||
static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
|
||||
int* num_used,
|
||||
const uint16_t* const clusters,
|
||||
uint16_t* const cluster_mappings,
|
||||
VP8LHistogram* cur_combo,
|
||||
const uint16_t* const bin_map,
|
||||
int num_bins,
|
||||
double combine_cost_factor,
|
||||
int low_effort) {
|
||||
static void HistogramCombineEntropyBin(
|
||||
VP8LHistogramSet* const image_histo, int* num_used,
|
||||
const uint16_t* const clusters, uint16_t* const cluster_mappings,
|
||||
VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
|
||||
float combine_cost_factor, int low_effort) {
|
||||
VP8LHistogram** const histograms = image_histo->histograms;
|
||||
int idx;
|
||||
struct {
|
||||
|
@ -679,11 +671,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
|
|||
cluster_mappings[clusters[idx]] = clusters[first];
|
||||
} else {
|
||||
// try to merge #idx into #first (both share the same bin_id)
|
||||
const double bit_cost = histograms[idx]->bit_cost_;
|
||||
const double bit_cost_thresh = -bit_cost * combine_cost_factor;
|
||||
const double curr_cost_diff =
|
||||
HistogramAddEval(histograms[first], histograms[idx],
|
||||
cur_combo, bit_cost_thresh);
|
||||
const float bit_cost = histograms[idx]->bit_cost_;
|
||||
const float bit_cost_thresh = -bit_cost * combine_cost_factor;
|
||||
const float curr_cost_diff = HistogramAddEval(
|
||||
histograms[first], histograms[idx], cur_combo, bit_cost_thresh);
|
||||
if (curr_cost_diff < bit_cost_thresh) {
|
||||
// Try to merge two histograms only if the combo is a trivial one or
|
||||
// the two candidate histograms are already non-trivial.
|
||||
|
@ -731,8 +722,8 @@ static uint32_t MyRand(uint32_t* const seed) {
|
|||
typedef struct {
|
||||
int idx1;
|
||||
int idx2;
|
||||
double cost_diff;
|
||||
double cost_combo;
|
||||
float cost_diff;
|
||||
float cost_combo;
|
||||
} HistogramPair;
|
||||
|
||||
typedef struct {
|
||||
|
@ -787,10 +778,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
|
|||
// Update the cost diff and combo of a pair of histograms. This needs to be
|
||||
// called when the the histograms have been merged with a third one.
|
||||
static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
|
||||
const VP8LHistogram* const h2,
|
||||
double threshold,
|
||||
const VP8LHistogram* const h2, float threshold,
|
||||
HistogramPair* const pair) {
|
||||
const double sum_cost = h1->bit_cost_ + h2->bit_cost_;
|
||||
const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
|
||||
pair->cost_combo = 0.;
|
||||
GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
|
||||
pair->cost_diff = pair->cost_combo - sum_cost;
|
||||
|
@ -799,9 +789,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
|
|||
// Create a pair from indices "idx1" and "idx2" provided its cost
|
||||
// is inferior to "threshold", a negative entropy.
|
||||
// It returns the cost of the pair, or 0. if it superior to threshold.
|
||||
static double HistoQueuePush(HistoQueue* const histo_queue,
|
||||
VP8LHistogram** const histograms, int idx1,
|
||||
int idx2, double threshold) {
|
||||
static float HistoQueuePush(HistoQueue* const histo_queue,
|
||||
VP8LHistogram** const histograms, int idx1,
|
||||
int idx2, float threshold) {
|
||||
const VP8LHistogram* h1;
|
||||
const VP8LHistogram* h2;
|
||||
HistogramPair pair;
|
||||
|
@ -945,8 +935,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
|||
++tries_with_no_success < num_tries_no_success;
|
||||
++iter) {
|
||||
int* mapping_index;
|
||||
double best_cost =
|
||||
(histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;
|
||||
float best_cost =
|
||||
(histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff;
|
||||
int best_idx1 = -1, best_idx2 = 1;
|
||||
const uint32_t rand_range = (*num_used - 1) * (*num_used);
|
||||
// (*num_used) / 2 was chosen empirically. Less means faster but worse
|
||||
|
@ -955,7 +945,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
|||
|
||||
// Pick random samples.
|
||||
for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
|
||||
double curr_cost;
|
||||
float curr_cost;
|
||||
// Choose two different histograms at random and try to combine them.
|
||||
const uint32_t tmp = MyRand(&seed) % rand_range;
|
||||
uint32_t idx1 = tmp / (*num_used - 1);
|
||||
|
@ -1034,7 +1024,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
|||
*do_greedy = (*num_used <= min_cluster_size);
|
||||
ok = 1;
|
||||
|
||||
End:
|
||||
End:
|
||||
HistoQueueClear(&histo_queue);
|
||||
WebPSafeFree(mappings);
|
||||
return ok;
|
||||
|
@ -1057,7 +1047,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
|||
if (out_size > 1) {
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
int best_out = 0;
|
||||
double best_bits = MAX_COST;
|
||||
float best_bits = MAX_BIT_COST;
|
||||
int k;
|
||||
if (in_histo[i] == NULL) {
|
||||
// Arbitrarily set to the previous value if unused to help future LZ77.
|
||||
|
@ -1065,7 +1055,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
|||
continue;
|
||||
}
|
||||
for (k = 0; k < out_size; ++k) {
|
||||
double cur_bits;
|
||||
float cur_bits;
|
||||
cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
|
||||
if (k == 0 || cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
|
@ -1093,13 +1083,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
|||
}
|
||||
}
|
||||
|
||||
static double GetCombineCostFactor(int histo_size, int quality) {
|
||||
double combine_cost_factor = 0.16;
|
||||
static float GetCombineCostFactor(int histo_size, int quality) {
|
||||
float combine_cost_factor = 0.16f;
|
||||
if (quality < 90) {
|
||||
if (histo_size > 256) combine_cost_factor /= 2.;
|
||||
if (histo_size > 512) combine_cost_factor /= 2.;
|
||||
if (histo_size > 1024) combine_cost_factor /= 2.;
|
||||
if (quality <= 50) combine_cost_factor /= 2.;
|
||||
if (histo_size > 256) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 512) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 1024) combine_cost_factor /= 2.f;
|
||||
if (quality <= 50) combine_cost_factor /= 2.f;
|
||||
}
|
||||
return combine_cost_factor;
|
||||
}
|
||||
|
@ -1169,13 +1159,13 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) {
|
|||
}
|
||||
|
||||
int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int quality, int low_effort,
|
||||
int histogram_bits, int cache_bits,
|
||||
const VP8LBackwardRefs* const refs, int quality,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols) {
|
||||
int ok = 0;
|
||||
uint16_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int histo_xsize =
|
||||
histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1;
|
||||
const int histo_ysize =
|
||||
|
@ -1192,7 +1182,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
|||
WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp));
|
||||
uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
|
||||
int num_used = image_histo_raw_size;
|
||||
if (orig_histo == NULL || map_tmp == NULL) goto Error;
|
||||
if (orig_histo == NULL || map_tmp == NULL) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
|
||||
// Construct the histograms from backward references.
|
||||
HistogramBuild(xsize, histogram_bits, refs, orig_histo);
|
||||
|
@ -1206,16 +1199,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
|||
|
||||
if (entropy_combine) {
|
||||
uint16_t* const bin_map = map_tmp;
|
||||
const double combine_cost_factor =
|
||||
const float combine_cost_factor =
|
||||
GetCombineCostFactor(image_histo_raw_size, quality);
|
||||
const uint32_t num_clusters = num_used;
|
||||
|
||||
HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
|
||||
// Collapse histograms with similar entropy.
|
||||
HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols,
|
||||
cluster_mappings, tmp_histo, bin_map,
|
||||
entropy_combine_num_bins, combine_cost_factor,
|
||||
low_effort);
|
||||
HistogramCombineEntropyBin(
|
||||
image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo,
|
||||
bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort);
|
||||
OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters,
|
||||
map_tmp, histogram_symbols);
|
||||
}
|
||||
|
@ -1229,11 +1221,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
|||
int do_greedy;
|
||||
if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
|
||||
&do_greedy)) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
if (do_greedy) {
|
||||
RemoveEmptyHistograms(image_histo);
|
||||
if (!HistogramCombineGreedy(image_histo, &num_used)) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
}
|
||||
|
@ -1243,10 +1237,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
|||
RemoveEmptyHistograms(image_histo);
|
||||
HistogramRemap(orig_histo, image_histo, histogram_symbols);
|
||||
|
||||
ok = 1;
|
||||
if (!WebPReportProgress(pic, *percent + percent_range, percent)) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
Error:
|
||||
VP8LFreeHistogramSet(orig_histo);
|
||||
WebPSafeFree(map_tmp);
|
||||
return ok;
|
||||
return (pic->error_code == VP8_ENC_OK);
|
||||
}
|
||||
|
|
|
@ -40,10 +40,10 @@ typedef struct {
|
|||
int palette_code_bits_;
|
||||
uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha
|
||||
// literal symbols are single valued.
|
||||
double bit_cost_; // cached value of bit cost.
|
||||
double literal_cost_; // Cached values of dominant entropy costs:
|
||||
double red_cost_; // literal, red & blue.
|
||||
double blue_cost_;
|
||||
float bit_cost_; // cached value of bit cost.
|
||||
float literal_cost_; // Cached values of dominant entropy costs:
|
||||
float red_cost_; // literal, red & blue.
|
||||
float blue_cost_;
|
||||
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
|
||||
} VP8LHistogram;
|
||||
|
||||
|
@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
|
|||
((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
|
||||
}
|
||||
|
||||
// Builds the histogram image.
|
||||
// Builds the histogram image. pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int quality, int low_effort,
|
||||
int histogram_bits, int cache_bits,
|
||||
const VP8LBackwardRefs* const refs, int quality,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols);
|
||||
uint16_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
// Returns the entropy for the symbols in the input array.
|
||||
double VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
float VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
|
||||
// Estimate how many bits the combined entropy of literals and distance
|
||||
// approximately maps to.
|
||||
double VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
float VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -15,12 +15,19 @@
|
|||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
#include "sharpyuv/sharpyuv_csp.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/random_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
// Uncomment to disable gamma-compression during RGB->U/V averaging
|
||||
#define USE_GAMMA_COMPRESSION
|
||||
|
@ -76,16 +83,16 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
|
|||
|
||||
#if defined(USE_GAMMA_COMPRESSION)
|
||||
|
||||
// gamma-compensates loss of resolution during chroma subsampling
|
||||
#define kGamma 0.80 // for now we use a different gamma value than kGammaF
|
||||
#define kGammaFix 12 // fixed-point precision for linear values
|
||||
#define kGammaScale ((1 << kGammaFix) - 1)
|
||||
#define kGammaTabFix 7 // fixed-point fractional bits precision
|
||||
#define kGammaTabScale (1 << kGammaTabFix)
|
||||
#define kGammaTabRounder (kGammaTabScale >> 1)
|
||||
#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
|
||||
// Gamma correction compensates loss of resolution during chroma subsampling.
|
||||
#define GAMMA_FIX 12 // fixed-point precision for linear values
|
||||
#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision
|
||||
#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX))
|
||||
static const double kGamma = 0.80;
|
||||
static const int kGammaScale = ((1 << GAMMA_FIX) - 1);
|
||||
static const int kGammaTabScale = (1 << GAMMA_TAB_FIX);
|
||||
static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1);
|
||||
|
||||
static int kLinearToGammaTab[kGammaTabSize + 1];
|
||||
static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
|
||||
static uint16_t kGammaToLinearTab[256];
|
||||
static volatile int kGammaTablesOk = 0;
|
||||
static void InitGammaTables(void);
|
||||
|
@ -93,13 +100,13 @@ static void InitGammaTables(void);
|
|||
WEBP_DSP_INIT_FUNC(InitGammaTables) {
|
||||
if (!kGammaTablesOk) {
|
||||
int v;
|
||||
const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
|
||||
const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale;
|
||||
const double norm = 1. / 255.;
|
||||
for (v = 0; v <= 255; ++v) {
|
||||
kGammaToLinearTab[v] =
|
||||
(uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
|
||||
}
|
||||
for (v = 0; v <= kGammaTabSize; ++v) {
|
||||
for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
|
||||
kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
|
||||
}
|
||||
kGammaTablesOk = 1;
|
||||
|
@ -111,12 +118,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
|
|||
}
|
||||
|
||||
static WEBP_INLINE int Interpolate(int v) {
|
||||
const int tab_pos = v >> (kGammaTabFix + 2); // integer part
|
||||
const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part
|
||||
const int x = v & ((kGammaTabScale << 2) - 1); // fractional part
|
||||
const int v0 = kLinearToGammaTab[tab_pos];
|
||||
const int v1 = kLinearToGammaTab[tab_pos + 1];
|
||||
const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
|
||||
assert(tab_pos + 1 < kGammaTabSize + 1);
|
||||
assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1);
|
||||
return y;
|
||||
}
|
||||
|
||||
|
@ -124,7 +131,7 @@ static WEBP_INLINE int Interpolate(int v) {
|
|||
// U/V value, suitable for RGBToU/V calls.
|
||||
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
|
||||
const int y = Interpolate(base_value << shift); // final uplifted value
|
||||
return (y + kGammaTabRounder) >> kGammaTabFix; // descale
|
||||
return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -158,415 +165,41 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) {
|
|||
//------------------------------------------------------------------------------
|
||||
// Sharp RGB->YUV conversion
|
||||
|
||||
static const int kNumIterations = 4;
|
||||
static const int kMinDimensionIterativeConversion = 4;
|
||||
|
||||
// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
|
||||
// banding sometimes. Better use extra precision.
|
||||
#define SFIX 2 // fixed-point precision of RGB and Y/W
|
||||
typedef int16_t fixed_t; // signed type with extra SFIX precision for UV
|
||||
typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W
|
||||
|
||||
#define SHALF (1 << SFIX >> 1)
|
||||
#define MAX_Y_T ((256 << SFIX) - 1)
|
||||
#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
|
||||
|
||||
#if defined(USE_GAMMA_COMPRESSION)
|
||||
|
||||
// We use tables of different size and precision for the Rec709 / BT2020
|
||||
// transfer function.
|
||||
#define kGammaF (1./0.45)
|
||||
static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
|
||||
#define GAMMA_TO_LINEAR_BITS 14
|
||||
static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX
|
||||
static volatile int kGammaTablesSOk = 0;
|
||||
static void InitGammaTablesS(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(InitGammaTablesS) {
|
||||
assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values
|
||||
if (!kGammaTablesSOk) {
|
||||
int v;
|
||||
const double norm = 1. / MAX_Y_T;
|
||||
const double scale = 1. / kGammaTabSize;
|
||||
const double a = 0.09929682680944;
|
||||
const double thresh = 0.018053968510807;
|
||||
const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
|
||||
for (v = 0; v <= MAX_Y_T; ++v) {
|
||||
const double g = norm * v;
|
||||
double value;
|
||||
if (g <= thresh * 4.5) {
|
||||
value = g / 4.5;
|
||||
} else {
|
||||
const double a_rec = 1. / (1. + a);
|
||||
value = pow(a_rec * (g + a), kGammaF);
|
||||
}
|
||||
kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
|
||||
}
|
||||
for (v = 0; v <= kGammaTabSize; ++v) {
|
||||
const double g = scale * v;
|
||||
double value;
|
||||
if (g <= thresh) {
|
||||
value = 4.5 * g;
|
||||
} else {
|
||||
value = (1. + a) * pow(g, 1. / kGammaF) - a;
|
||||
}
|
||||
// we already incorporate the 1/2 rounding constant here
|
||||
kLinearToGammaTabS[v] =
|
||||
(uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
|
||||
kGammaTablesSOk = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
|
||||
static WEBP_INLINE uint32_t GammaToLinearS(int v) {
|
||||
return kGammaToLinearTabS[v];
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
|
||||
// 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
|
||||
const uint32_t v = value * kGammaTabSize;
|
||||
const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
|
||||
// fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
|
||||
const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part
|
||||
// v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
|
||||
const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
|
||||
const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
|
||||
// Final interpolation. Note that rounding is already included.
|
||||
const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
|
||||
const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void InitGammaTablesS(void) {}
|
||||
static WEBP_INLINE uint32_t GammaToLinearS(int v) {
|
||||
return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
|
||||
}
|
||||
static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
|
||||
return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
|
||||
}
|
||||
|
||||
#endif // USE_GAMMA_COMPRESSION
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static uint8_t clip_8b(fixed_t v) {
|
||||
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
|
||||
}
|
||||
|
||||
static fixed_y_t clip_y(int y) {
|
||||
return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int RGBToGray(int r, int g, int b) {
|
||||
const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
|
||||
return (luma >> YUV_FIX);
|
||||
}
|
||||
|
||||
static uint32_t ScaleDown(int a, int b, int c, int d) {
|
||||
const uint32_t A = GammaToLinearS(a);
|
||||
const uint32_t B = GammaToLinearS(b);
|
||||
const uint32_t C = GammaToLinearS(c);
|
||||
const uint32_t D = GammaToLinearS(d);
|
||||
return LinearToGammaS((A + B + C + D + 2) >> 2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
const uint32_t R = GammaToLinearS(src[0 * w + i]);
|
||||
const uint32_t G = GammaToLinearS(src[1 * w + i]);
|
||||
const uint32_t B = GammaToLinearS(src[2 * w + i]);
|
||||
const uint32_t Y = RGBToGray(R, G, B);
|
||||
dst[i] = (fixed_y_t)LinearToGammaS(Y);
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
|
||||
fixed_t* dst, int uv_w) {
|
||||
int i;
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
|
||||
src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
|
||||
const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
|
||||
src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
|
||||
const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
|
||||
src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
|
||||
const int W = RGBToGray(r, g, b);
|
||||
dst[0 * uv_w] = (fixed_t)(r - W);
|
||||
dst[1 * uv_w] = (fixed_t)(g - W);
|
||||
dst[2 * uv_w] = (fixed_t)(b - W);
|
||||
dst += 1;
|
||||
src1 += 2;
|
||||
src2 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
|
||||
const int v0 = (A * 3 + B + 2) >> 2;
|
||||
return clip_y(v0 + W0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX
|
||||
return ((fixed_y_t)a << SFIX) | SHALF;
|
||||
}
|
||||
|
||||
static void ImportOneRow(const uint8_t* const r_ptr,
|
||||
const uint8_t* const g_ptr,
|
||||
const uint8_t* const b_ptr,
|
||||
int step,
|
||||
int pic_width,
|
||||
fixed_y_t* const dst) {
|
||||
int i;
|
||||
const int w = (pic_width + 1) & ~1;
|
||||
for (i = 0; i < pic_width; ++i) {
|
||||
const int off = i * step;
|
||||
dst[i + 0 * w] = UpLift(r_ptr[off]);
|
||||
dst[i + 1 * w] = UpLift(g_ptr[off]);
|
||||
dst[i + 2 * w] = UpLift(b_ptr[off]);
|
||||
}
|
||||
if (pic_width & 1) { // replicate rightmost pixel
|
||||
dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
|
||||
dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
|
||||
dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void InterpolateTwoRows(const fixed_y_t* const best_y,
|
||||
const fixed_t* prev_uv,
|
||||
const fixed_t* cur_uv,
|
||||
const fixed_t* next_uv,
|
||||
int w,
|
||||
fixed_y_t* out1,
|
||||
fixed_y_t* out2) {
|
||||
const int uv_w = w >> 1;
|
||||
const int len = (w - 1) >> 1; // length to filter
|
||||
int k = 3;
|
||||
while (k-- > 0) { // process each R/G/B segments in turn
|
||||
// special boundary case for i==0
|
||||
out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
|
||||
out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
|
||||
|
||||
WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
|
||||
WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
|
||||
|
||||
// special boundary case for i == w - 1 when w is even
|
||||
if (!(w & 1)) {
|
||||
out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
|
||||
best_y[w - 1 + 0]);
|
||||
out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
|
||||
best_y[w - 1 + w]);
|
||||
}
|
||||
out1 += w;
|
||||
out2 += w;
|
||||
prev_uv += uv_w;
|
||||
cur_uv += uv_w;
|
||||
next_uv += uv_w;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
|
||||
const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
|
||||
return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
|
||||
const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER;
|
||||
return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
|
||||
const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER;
|
||||
return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
|
||||
}
|
||||
|
||||
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
|
||||
WebPPicture* const picture) {
|
||||
int i, j;
|
||||
uint8_t* dst_y = picture->y;
|
||||
uint8_t* dst_u = picture->u;
|
||||
uint8_t* dst_v = picture->v;
|
||||
const fixed_t* const best_uv_base = best_uv;
|
||||
const int w = (picture->width + 1) & ~1;
|
||||
const int h = (picture->height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
|
||||
for (i = 0; i < picture->width; ++i) {
|
||||
const int off = (i >> 1);
|
||||
const int W = best_y[i];
|
||||
const int r = best_uv[off + 0 * uv_w] + W;
|
||||
const int g = best_uv[off + 1 * uv_w] + W;
|
||||
const int b = best_uv[off + 2 * uv_w] + W;
|
||||
dst_y[i] = ConvertRGBToY(r, g, b);
|
||||
}
|
||||
best_y += w;
|
||||
best_uv += (j & 1) * 3 * uv_w;
|
||||
dst_y += picture->y_stride;
|
||||
}
|
||||
for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int off = i;
|
||||
const int r = best_uv[off + 0 * uv_w];
|
||||
const int g = best_uv[off + 1 * uv_w];
|
||||
const int b = best_uv[off + 2 * uv_w];
|
||||
dst_u[i] = ConvertRGBToU(r, g, b);
|
||||
dst_v[i] = ConvertRGBToV(r, g, b);
|
||||
}
|
||||
best_uv += 3 * uv_w;
|
||||
dst_u += picture->uv_stride;
|
||||
dst_v += picture->uv_stride;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main function
|
||||
|
||||
#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
|
||||
extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
|
||||
|
||||
static void SafeInitSharpYuv(void) {
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
static pthread_mutex_t initsharpyuv_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
if (pthread_mutex_lock(&initsharpyuv_lock)) return;
|
||||
#endif
|
||||
|
||||
SharpYuvInit(VP8GetCPUInfo);
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
(void)pthread_mutex_unlock(&initsharpyuv_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int PreprocessARGB(const uint8_t* r_ptr,
|
||||
const uint8_t* g_ptr,
|
||||
const uint8_t* b_ptr,
|
||||
int step, int rgb_stride,
|
||||
WebPPicture* const picture) {
|
||||
// we expand the right/bottom border if needed
|
||||
const int w = (picture->width + 1) & ~1;
|
||||
const int h = (picture->height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
uint64_t prev_diff_y_sum = ~0;
|
||||
int j, iter;
|
||||
|
||||
// TODO(skal): allocate one big memory chunk. But for now, it's easier
|
||||
// for valgrind debugging to have several chunks.
|
||||
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
|
||||
fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
|
||||
fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
|
||||
fixed_y_t* best_y = best_y_base;
|
||||
fixed_y_t* target_y = target_y_base;
|
||||
fixed_t* best_uv = best_uv_base;
|
||||
fixed_t* target_uv = target_uv_base;
|
||||
const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
|
||||
int ok;
|
||||
|
||||
if (best_y_base == NULL || best_uv_base == NULL ||
|
||||
target_y_base == NULL || target_uv_base == NULL ||
|
||||
best_rgb_y == NULL || best_rgb_uv == NULL ||
|
||||
tmp_buffer == NULL) {
|
||||
ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto End;
|
||||
const int ok = SharpYuvConvert(
|
||||
r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8,
|
||||
picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v,
|
||||
picture->uv_stride, /*yuv_bit_depth=*/8, picture->width,
|
||||
picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp));
|
||||
if (!ok) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
assert(picture->width >= kMinDimensionIterativeConversion);
|
||||
assert(picture->height >= kMinDimensionIterativeConversion);
|
||||
|
||||
WebPInitConvertARGBToYUV();
|
||||
|
||||
// Import RGB samples to W/RGB representation.
|
||||
for (j = 0; j < picture->height; j += 2) {
|
||||
const int is_last_row = (j == picture->height - 1);
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
|
||||
// prepare two rows of input
|
||||
ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
|
||||
if (!is_last_row) {
|
||||
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
|
||||
step, picture->width, src2);
|
||||
} else {
|
||||
memcpy(src2, src1, 3 * w * sizeof(*src2));
|
||||
}
|
||||
StoreGray(src1, best_y + 0, w);
|
||||
StoreGray(src2, best_y + w, w);
|
||||
|
||||
UpdateW(src1, target_y, w);
|
||||
UpdateW(src2, target_y + w, w);
|
||||
UpdateChroma(src1, src2, target_uv, uv_w);
|
||||
memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
r_ptr += 2 * rgb_stride;
|
||||
g_ptr += 2 * rgb_stride;
|
||||
b_ptr += 2 * rgb_stride;
|
||||
}
|
||||
|
||||
// Iterate and resolve clipping conflicts.
|
||||
for (iter = 0; iter < kNumIterations; ++iter) {
|
||||
const fixed_t* cur_uv = best_uv_base;
|
||||
const fixed_t* prev_uv = best_uv_base;
|
||||
uint64_t diff_y_sum = 0;
|
||||
|
||||
best_y = best_y_base;
|
||||
best_uv = best_uv_base;
|
||||
target_y = target_y_base;
|
||||
target_uv = target_uv_base;
|
||||
for (j = 0; j < h; j += 2) {
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
{
|
||||
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
|
||||
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
|
||||
prev_uv = cur_uv;
|
||||
cur_uv = next_uv;
|
||||
}
|
||||
|
||||
UpdateW(src1, best_rgb_y + 0 * w, w);
|
||||
UpdateW(src2, best_rgb_y + 1 * w, w);
|
||||
UpdateChroma(src1, src2, best_rgb_uv, uv_w);
|
||||
|
||||
// update two rows of Y and one row of RGB
|
||||
diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
|
||||
WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
|
||||
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
}
|
||||
// test exit condition
|
||||
if (iter > 0) {
|
||||
if (diff_y_sum < diff_y_threshold) break;
|
||||
if (diff_y_sum > prev_diff_y_sum) break;
|
||||
}
|
||||
prev_diff_y_sum = diff_y_sum;
|
||||
}
|
||||
// final reconstruction
|
||||
ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
|
||||
|
||||
End:
|
||||
WebPSafeFree(best_y_base);
|
||||
WebPSafeFree(best_uv_base);
|
||||
WebPSafeFree(target_y_base);
|
||||
WebPSafeFree(target_uv_base);
|
||||
WebPSafeFree(best_rgb_y);
|
||||
WebPSafeFree(best_rgb_uv);
|
||||
WebPSafeFree(tmp_buffer);
|
||||
return ok;
|
||||
}
|
||||
#undef SAFE_ALLOC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// "Fast" regular RGB->YUV
|
||||
|
@ -591,8 +224,8 @@ static const int kAlphaFix = 19;
|
|||
// and constant are adjusted very tightly to fit 32b arithmetic.
|
||||
// In particular, they use the fact that the operands for 'v / a' are actually
|
||||
// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
|
||||
// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
|
||||
// overflow is: kGammaFix + kAlphaFix <= 31.
|
||||
// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid
|
||||
// overflow is: GAMMA_FIX + kAlphaFix <= 31.
|
||||
static const uint32_t kInvAlpha[4 * 0xff + 1] = {
|
||||
0, /* alpha = 0 */
|
||||
524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
|
||||
|
@ -818,11 +451,20 @@ static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
|
|||
dst[0] = SUM4(r_ptr + j, step);
|
||||
dst[1] = SUM4(g_ptr + j, step);
|
||||
dst[2] = SUM4(b_ptr + j, step);
|
||||
// MemorySanitizer may raise false positives with data that passes through
|
||||
// RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles.
|
||||
// See https://crbug.com/webp/573.
|
||||
#ifdef WEBP_MSAN
|
||||
dst[3] = 0;
|
||||
#endif
|
||||
}
|
||||
if (width & 1) {
|
||||
dst[0] = SUM2(r_ptr + j);
|
||||
dst[1] = SUM2(g_ptr + j);
|
||||
dst[2] = SUM2(b_ptr + j);
|
||||
#ifdef WEBP_MSAN
|
||||
dst[3] = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -863,18 +505,18 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
|
|||
use_iterative_conversion = 0;
|
||||
}
|
||||
|
||||
if (!WebPPictureAllocYUVA(picture, width, height)) {
|
||||
if (!WebPPictureAllocYUVA(picture)) {
|
||||
return 0;
|
||||
}
|
||||
if (has_alpha) {
|
||||
assert(step == 4);
|
||||
#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
|
||||
assert(kAlphaFix + kGammaFix <= 31);
|
||||
assert(kAlphaFix + GAMMA_FIX <= 31);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (use_iterative_conversion) {
|
||||
InitGammaTablesS();
|
||||
SafeInitSharpYuv();
|
||||
if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -1044,7 +686,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
|
|||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
// Allocate a new argb buffer (discarding the previous one).
|
||||
if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
|
||||
if (!WebPPictureAllocARGB(picture)) return 0;
|
||||
picture->use_argb = 1;
|
||||
|
||||
// Convert
|
||||
|
@ -1106,6 +748,8 @@ static int Import(WebPPicture* const picture,
|
|||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
|
||||
if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0;
|
||||
|
||||
if (!picture->use_argb) {
|
||||
const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
|
||||
return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
|
||||
|
@ -1163,24 +807,24 @@ static int Import(WebPPicture* const picture,
|
|||
#if !defined(WEBP_REDUCE_CSP)
|
||||
|
||||
int WebPPictureImportBGR(WebPPicture* picture,
|
||||
const uint8_t* rgb, int rgb_stride) {
|
||||
return (picture != NULL && rgb != NULL)
|
||||
? Import(picture, rgb, rgb_stride, 3, 1, 0)
|
||||
const uint8_t* bgr, int bgr_stride) {
|
||||
return (picture != NULL && bgr != NULL)
|
||||
? Import(picture, bgr, bgr_stride, 3, 1, 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
int WebPPictureImportBGRA(WebPPicture* picture,
|
||||
const uint8_t* rgba, int rgba_stride) {
|
||||
return (picture != NULL && rgba != NULL)
|
||||
? Import(picture, rgba, rgba_stride, 4, 1, 1)
|
||||
const uint8_t* bgra, int bgra_stride) {
|
||||
return (picture != NULL && bgra != NULL)
|
||||
? Import(picture, bgra, bgra_stride, 4, 1, 1)
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
||||
int WebPPictureImportBGRX(WebPPicture* picture,
|
||||
const uint8_t* rgba, int rgba_stride) {
|
||||
return (picture != NULL && rgba != NULL)
|
||||
? Import(picture, rgba, rgba_stride, 4, 1, 0)
|
||||
const uint8_t* bgrx, int bgrx_stride) {
|
||||
return (picture != NULL && bgrx != NULL)
|
||||
? Import(picture, bgrx, bgrx_stride, 4, 1, 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
@ -1201,9 +845,9 @@ int WebPPictureImportRGBA(WebPPicture* picture,
|
|||
}
|
||||
|
||||
int WebPPictureImportRGBX(WebPPicture* picture,
|
||||
const uint8_t* rgba, int rgba_stride) {
|
||||
return (picture != NULL && rgba != NULL)
|
||||
? Import(picture, rgba, rgba_stride, 4, 0, 0)
|
||||
const uint8_t* rgbx, int rgbx_stride) {
|
||||
return (picture != NULL && rgbx != NULL)
|
||||
? Import(picture, rgbx, rgbx_stride, 4, 0, 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) {
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int WebPValidatePicture(const WebPPicture* const picture) {
|
||||
if (picture == NULL) return 0;
|
||||
if (picture->width <= 0 || picture->height <= 0) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 ||
|
||||
picture->height <= 0 || picture->height / 4 > INT_MAX / 4) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
if (picture->colorspace != WEBP_YUV420 &&
|
||||
picture->colorspace != WEBP_YUV420A) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
|
||||
picture->memory_argb_ = NULL;
|
||||
picture->argb = NULL;
|
||||
|
@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) {
|
|||
WebPPictureResetBufferYUVA(picture);
|
||||
}
|
||||
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture) {
|
||||
void* memory;
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
const uint64_t argb_size = (uint64_t)width * height;
|
||||
|
||||
assert(picture != NULL);
|
||||
if (!WebPValidatePicture(picture)) return 0;
|
||||
|
||||
WebPSafeFree(picture->memory_argb_);
|
||||
WebPPictureResetBufferARGB(picture);
|
||||
|
||||
if (width <= 0 || height <= 0) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
// allocate a new buffer.
|
||||
memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb));
|
||||
if (memory == NULL) {
|
||||
|
@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
|
||||
const WebPEncCSP uv_csp =
|
||||
(WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK);
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture) {
|
||||
const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT;
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
const int y_stride = width;
|
||||
const int uv_width = (int)(((int64_t)width + 1) >> 1);
|
||||
const int uv_height = (int)(((int64_t)height + 1) >> 1);
|
||||
|
@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
|
|||
uint64_t y_size, uv_size, a_size, total_size;
|
||||
uint8_t* mem;
|
||||
|
||||
assert(picture != NULL);
|
||||
if (!WebPValidatePicture(picture)) return 0;
|
||||
|
||||
WebPSafeFree(picture->memory_);
|
||||
WebPPictureResetBufferYUVA(picture);
|
||||
|
||||
if (uv_csp != WEBP_YUV420) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
|
||||
// alpha
|
||||
a_width = has_alpha ? width : 0;
|
||||
a_stride = a_width;
|
||||
|
@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
|
|||
|
||||
int WebPPictureAlloc(WebPPicture* picture) {
|
||||
if (picture != NULL) {
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
|
||||
WebPPictureFree(picture); // erase previous buffer
|
||||
|
||||
if (!picture->use_argb) {
|
||||
return WebPPictureAllocYUVA(picture, width, height);
|
||||
return WebPPictureAllocYUVA(picture);
|
||||
} else {
|
||||
return WebPPictureAllocARGB(picture, width, height);
|
||||
return WebPPictureAllocARGB(picture);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
|
|
@ -13,14 +13,15 @@
|
|||
|
||||
#include "src/webp/encode.h"
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
#include "src/utils/rescaler_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#endif // !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
#define HALVE(x) (((x) + 1) >> 1)
|
||||
|
||||
|
@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic,
|
|||
return 1;
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
|
||||
if (src == NULL || dst == NULL) return 0;
|
||||
if (src == dst) return 1;
|
||||
|
@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
|
|||
}
|
||||
return 1;
|
||||
}
|
||||
#endif // !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
int WebPPictureIsView(const WebPPicture* picture) {
|
||||
if (picture == NULL) return 0;
|
||||
|
@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src,
|
|||
return 1;
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
//------------------------------------------------------------------------------
|
||||
// Picture cropping
|
||||
|
||||
|
@ -198,34 +202,34 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
|
|||
}
|
||||
}
|
||||
|
||||
int WebPPictureRescale(WebPPicture* pic, int width, int height) {
|
||||
int WebPPictureRescale(WebPPicture* picture, int width, int height) {
|
||||
WebPPicture tmp;
|
||||
int prev_width, prev_height;
|
||||
rescaler_t* work;
|
||||
|
||||
if (pic == NULL) return 0;
|
||||
prev_width = pic->width;
|
||||
prev_height = pic->height;
|
||||
if (picture == NULL) return 0;
|
||||
prev_width = picture->width;
|
||||
prev_height = picture->height;
|
||||
if (!WebPRescalerGetScaledDimensions(
|
||||
prev_width, prev_height, &width, &height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
PictureGrabSpecs(pic, &tmp);
|
||||
PictureGrabSpecs(picture, &tmp);
|
||||
tmp.width = width;
|
||||
tmp.height = height;
|
||||
if (!WebPPictureAlloc(&tmp)) return 0;
|
||||
|
||||
if (!pic->use_argb) {
|
||||
if (!picture->use_argb) {
|
||||
work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
|
||||
if (work == NULL) {
|
||||
WebPPictureFree(&tmp);
|
||||
return 0;
|
||||
}
|
||||
// If present, we need to rescale alpha first (for AlphaMultiplyY).
|
||||
if (pic->a != NULL) {
|
||||
if (picture->a != NULL) {
|
||||
WebPInitAlphaProcessing();
|
||||
if (!RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
|
||||
if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride,
|
||||
tmp.a, width, height, tmp.a_stride, work, 1)) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -233,17 +237,15 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
|
|||
|
||||
// We take transparency into account on the luma plane only. That's not
|
||||
// totally exact blending, but still is a good approximation.
|
||||
AlphaMultiplyY(pic, 0);
|
||||
if (!RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
|
||||
AlphaMultiplyY(picture, 0);
|
||||
if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride,
|
||||
tmp.y, width, height, tmp.y_stride, work, 1) ||
|
||||
!RescalePlane(pic->u,
|
||||
HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
|
||||
tmp.u,
|
||||
HALVE(width), HALVE(height), tmp.uv_stride, work, 1) ||
|
||||
!RescalePlane(pic->v,
|
||||
HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
|
||||
tmp.v,
|
||||
HALVE(width), HALVE(height), tmp.uv_stride, work, 1)) {
|
||||
!RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height),
|
||||
picture->uv_stride, tmp.u, HALVE(width), HALVE(height),
|
||||
tmp.uv_stride, work, 1) ||
|
||||
!RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height),
|
||||
picture->uv_stride, tmp.v, HALVE(width), HALVE(height),
|
||||
tmp.uv_stride, work, 1)) {
|
||||
return 0;
|
||||
}
|
||||
AlphaMultiplyY(&tmp, 1);
|
||||
|
@ -257,18 +259,17 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
|
|||
// weighting first (black-matting), scale the RGB values, and remove
|
||||
// the premultiplication afterward (while preserving the alpha channel).
|
||||
WebPInitAlphaProcessing();
|
||||
AlphaMultiplyARGB(pic, 0);
|
||||
if (!RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
|
||||
pic->argb_stride * 4,
|
||||
(uint8_t*)tmp.argb, width, height,
|
||||
tmp.argb_stride * 4, work, 4)) {
|
||||
AlphaMultiplyARGB(picture, 0);
|
||||
if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height,
|
||||
picture->argb_stride * 4, (uint8_t*)tmp.argb, width,
|
||||
height, tmp.argb_stride * 4, work, 4)) {
|
||||
return 0;
|
||||
}
|
||||
AlphaMultiplyARGB(&tmp, 1);
|
||||
}
|
||||
WebPPictureFree(pic);
|
||||
WebPPictureFree(picture);
|
||||
WebPSafeFree(work);
|
||||
*pic = tmp;
|
||||
*picture = tmp;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -280,23 +281,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int WebPPictureIsView(const WebPPicture* picture) {
|
||||
(void)picture;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebPPictureView(const WebPPicture* src,
|
||||
int left, int top, int width, int height,
|
||||
WebPPicture* dst) {
|
||||
(void)src;
|
||||
(void)left;
|
||||
(void)top;
|
||||
(void)width;
|
||||
(void)height;
|
||||
(void)dst;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebPPictureCrop(WebPPicture* pic,
|
||||
int left, int top, int width, int height) {
|
||||
(void)pic;
|
||||
|
|
|
@ -190,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
|
|||
return (0xff000000u | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
||||
void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) {
|
||||
const int red = (background_rgb >> 16) & 0xff;
|
||||
const int green = (background_rgb >> 8) & 0xff;
|
||||
const int blue = (background_rgb >> 0) & 0xff;
|
||||
int x, y;
|
||||
if (pic == NULL) return;
|
||||
if (!pic->use_argb) {
|
||||
const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop
|
||||
if (picture == NULL) return;
|
||||
if (!picture->use_argb) {
|
||||
// omit last pixel during u/v loop
|
||||
const int uv_width = (picture->width >> 1);
|
||||
const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
|
||||
// VP8RGBToU/V expects the u/v values summed over four pixels
|
||||
const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
|
||||
const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
|
||||
const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
|
||||
uint8_t* y_ptr = pic->y;
|
||||
uint8_t* u_ptr = pic->u;
|
||||
uint8_t* v_ptr = pic->v;
|
||||
uint8_t* a_ptr = pic->a;
|
||||
const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
|
||||
uint8_t* y_ptr = picture->y;
|
||||
uint8_t* u_ptr = picture->u;
|
||||
uint8_t* v_ptr = picture->v;
|
||||
uint8_t* a_ptr = picture->a;
|
||||
if (!has_alpha || a_ptr == NULL) return; // nothing to do
|
||||
for (y = 0; y < pic->height; ++y) {
|
||||
for (y = 0; y < picture->height; ++y) {
|
||||
// Luma blending
|
||||
for (x = 0; x < pic->width; ++x) {
|
||||
for (x = 0; x < picture->width; ++x) {
|
||||
const uint8_t alpha = a_ptr[x];
|
||||
if (alpha < 0xff) {
|
||||
y_ptr[x] = BLEND(Y0, y_ptr[x], alpha);
|
||||
|
@ -219,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
|||
// Chroma blending every even line
|
||||
if ((y & 1) == 0) {
|
||||
uint8_t* const a_ptr2 =
|
||||
(y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
|
||||
(y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride;
|
||||
for (x = 0; x < uv_width; ++x) {
|
||||
// Average four alpha values into a single blending weight.
|
||||
// TODO(skal): might lead to visible contouring. Can we do better?
|
||||
|
@ -229,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
|||
u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
|
||||
v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
|
||||
}
|
||||
if (pic->width & 1) { // rightmost pixel
|
||||
if (picture->width & 1) { // rightmost pixel
|
||||
const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
|
||||
u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
|
||||
v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
|
||||
}
|
||||
} else {
|
||||
u_ptr += pic->uv_stride;
|
||||
v_ptr += pic->uv_stride;
|
||||
u_ptr += picture->uv_stride;
|
||||
v_ptr += picture->uv_stride;
|
||||
}
|
||||
memset(a_ptr, 0xff, pic->width); // reset alpha value to opaque
|
||||
a_ptr += pic->a_stride;
|
||||
y_ptr += pic->y_stride;
|
||||
memset(a_ptr, 0xff, picture->width); // reset alpha value to opaque
|
||||
a_ptr += picture->a_stride;
|
||||
y_ptr += picture->y_stride;
|
||||
}
|
||||
} else {
|
||||
uint32_t* argb = pic->argb;
|
||||
uint32_t* argb = picture->argb;
|
||||
const uint32_t background = MakeARGB32(red, green, blue);
|
||||
for (y = 0; y < pic->height; ++y) {
|
||||
for (x = 0; x < pic->width; ++x) {
|
||||
for (y = 0; y < picture->height; ++y) {
|
||||
for (x = 0; x < picture->width; ++x) {
|
||||
const int alpha = (argb[x] >> 24) & 0xff;
|
||||
if (alpha != 0xff) {
|
||||
if (alpha > 0) {
|
||||
|
@ -262,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
|||
}
|
||||
}
|
||||
}
|
||||
argb += pic->argb_stride;
|
||||
argb += picture->argb_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/enc/vp8li_enc.h"
|
||||
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
|
@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
|
|||
// Methods to calculate Entropy (Shannon).
|
||||
|
||||
static float PredictionCostSpatial(const int counts[256], int weight_0,
|
||||
double exp_val) {
|
||||
float exp_val) {
|
||||
const int significant_symbols = 256 >> 4;
|
||||
const double exp_decay_factor = 0.6;
|
||||
double bits = weight_0 * counts[0];
|
||||
const float exp_decay_factor = 0.6f;
|
||||
float bits = (float)weight_0 * counts[0];
|
||||
int i;
|
||||
for (i = 1; i < significant_symbols; ++i) {
|
||||
bits += exp_val * (counts[i] + counts[256 - i]);
|
||||
|
@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0,
|
|||
static float PredictionCostSpatialHistogram(const int accumulated[4][256],
|
||||
const int tile[4][256]) {
|
||||
int i;
|
||||
double retval = 0;
|
||||
float retval = 0.f;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const double kExpValue = 0.94;
|
||||
const float kExpValue = 0.94f;
|
||||
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
|
||||
retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
|
||||
}
|
||||
|
@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height,
|
|||
// with respect to predictions. If near_lossless_quality < 100, applies
|
||||
// near lossless processing, shaving off more bits of residuals for lower
|
||||
// qualities.
|
||||
void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green) {
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
||||
int percent_start = *percent;
|
||||
int tile_y;
|
||||
int histo[4][256];
|
||||
const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
|
||||
|
@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
|||
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
|
||||
int tile_x;
|
||||
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
|
||||
const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
|
||||
bits, histo, argb_scratch, argb, max_quantization, exact,
|
||||
used_subtract_green, image);
|
||||
const int pred = GetBestPredictorForTile(
|
||||
width, height, tile_x, tile_y, bits, histo, argb_scratch, argb,
|
||||
max_quantization, exact, used_subtract_green, image);
|
||||
image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tiles_per_col,
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
|
||||
low_effort, max_quantization, exact,
|
||||
used_subtract_green);
|
||||
return WebPReportProgress(pic, percent_start + percent_range, percent);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
|||
const int counts[256]) {
|
||||
// Favor low entropy, locally and globally.
|
||||
// Favor small absolute values for PredictionCostSpatial
|
||||
static const double kExpValue = 2.4;
|
||||
static const float kExpValue = 2.4f;
|
||||
return VP8LCombinedShannonEntropy(counts, accumulated) +
|
||||
PredictionCostSpatial(counts, 3, kExpValue);
|
||||
}
|
||||
|
@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
|
|||
}
|
||||
}
|
||||
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image) {
|
||||
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_xsize = VP8LSubSampleSize(width, bits);
|
||||
const int tile_ysize = VP8LSubSampleSize(height, bits);
|
||||
int percent_start = *percent;
|
||||
int accumulated_red_histo[256] = { 0 };
|
||||
int accumulated_blue_histo[256] = { 0 };
|
||||
int tile_x, tile_y;
|
||||
|
@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tile_ysize,
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) {
|
|||
rd->score = MAX_COST;
|
||||
}
|
||||
|
||||
static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
|
||||
static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst,
|
||||
const VP8ModeScore* WEBP_RESTRICT const src) {
|
||||
dst->D = src->D;
|
||||
dst->SD = src->SD;
|
||||
dst->R = src->R;
|
||||
|
@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
|
|||
dst->score = src->score;
|
||||
}
|
||||
|
||||
static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
|
||||
static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
|
||||
const VP8ModeScore* WEBP_RESTRICT const src) {
|
||||
dst->D += src->D;
|
||||
dst->SD += src->SD;
|
||||
dst->R += src->R;
|
||||
|
@ -588,10 +590,10 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
|
|||
// Coefficient type.
|
||||
enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
|
||||
|
||||
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
|
||||
int16_t in[16], int16_t out[16],
|
||||
int ctx0, int coeff_type,
|
||||
const VP8Matrix* const mtx,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int lambda) {
|
||||
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
||||
CostArrayPtr const costs =
|
||||
|
@ -767,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||
// all at once. Output is the reconstructed block in *yuv_out, and the
|
||||
// quantized levels in *levels.
|
||||
|
||||
static int ReconstructIntra16(VP8EncIterator* const it,
|
||||
VP8ModeScore* const rd,
|
||||
uint8_t* const yuv_out,
|
||||
static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
uint8_t* WEBP_RESTRICT const yuv_out,
|
||||
int mode) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
|
||||
|
@ -819,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
|||
return nz;
|
||||
}
|
||||
|
||||
static int ReconstructIntra4(VP8EncIterator* const it,
|
||||
static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
int16_t levels[16],
|
||||
const uint8_t* const src,
|
||||
uint8_t* const yuv_out,
|
||||
const uint8_t* WEBP_RESTRICT const src,
|
||||
uint8_t* WEBP_RESTRICT const yuv_out,
|
||||
int mode) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
|
||||
|
@ -855,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it,
|
|||
|
||||
// Quantize as usual, but also compute and return the quantization error.
|
||||
// Error is already divided by DSHIFT.
|
||||
static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
|
||||
static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int V = *v;
|
||||
const int sign = (V < 0);
|
||||
if (sign) V = -V;
|
||||
|
@ -869,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
|
|||
return (sign ? -V : V) >> DSCALE;
|
||||
}
|
||||
|
||||
static void CorrectDCValues(const VP8EncIterator* const it,
|
||||
const VP8Matrix* const mtx,
|
||||
int16_t tmp[][16], VP8ModeScore* const rd) {
|
||||
static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int16_t tmp[][16],
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
// | top[0] | top[1]
|
||||
// --------+--------+---------
|
||||
// left[0] | tmp[0] tmp[1] <-> err0 err1
|
||||
|
@ -902,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it,
|
|||
}
|
||||
}
|
||||
|
||||
static void StoreDiffusionErrors(VP8EncIterator* const it,
|
||||
const VP8ModeScore* const rd) {
|
||||
static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
int ch;
|
||||
for (ch = 0; ch <= 1; ++ch) {
|
||||
int8_t* const top = it->top_derr_[it->x_][ch];
|
||||
|
@ -922,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it,
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
uint8_t* const yuv_out, int mode) {
|
||||
static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
|
||||
const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
|
||||
|
@ -994,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) {
|
|||
SwapPtr(&it->yuv_out_, &it->yuv_out2_);
|
||||
}
|
||||
|
||||
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
|
||||
static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT rd) {
|
||||
const int kNumBlocks = 16;
|
||||
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
|
||||
const int lambda = dqm->lambda_i16_;
|
||||
|
@ -1054,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
|
|||
//------------------------------------------------------------------------------
|
||||
|
||||
// return the cost array corresponding to the surrounding prediction modes.
|
||||
static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
|
||||
static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const uint8_t modes[16]) {
|
||||
const int preds_w = it->enc_->preds_w_;
|
||||
const int x = (it->i4_ & 3), y = it->i4_ >> 2;
|
||||
|
@ -1063,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
|
|||
return VP8FixedCostsI4[top][left];
|
||||
}
|
||||
|
||||
static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
|
||||
const int lambda = dqm->lambda_i4_;
|
||||
|
@ -1159,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
const int kNumBlocks = 8;
|
||||
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
|
||||
const int lambda = dqm->lambda_uv_;
|
||||
|
@ -1211,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||
//------------------------------------------------------------------------------
|
||||
// Final reconstruction and quantization.
|
||||
|
||||
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const int is_i16 = (it->mb_->type_ == 1);
|
||||
int nz = 0;
|
||||
|
@ -1236,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||
}
|
||||
|
||||
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
|
||||
static void RefineUsingDistortion(VP8EncIterator* const it,
|
||||
static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
int try_both_modes, int refine_uv_mode,
|
||||
VP8ModeScore* const rd) {
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
score_t best_score = MAX_COST;
|
||||
int nz = 0;
|
||||
int mode;
|
||||
|
@ -1352,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
|
|||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
VP8RDLevel rd_opt) {
|
||||
int is_skipped;
|
||||
const int method = it->enc_->method_;
|
||||
|
|
|
@ -32,7 +32,7 @@ extern "C" {
|
|||
// version numbers
|
||||
#define ENC_MAJ_VERSION 1
|
||||
#define ENC_MIN_VERSION 2
|
||||
#define ENC_REV_VERSION 2
|
||||
#define ENC_REV_VERSION 4
|
||||
|
||||
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
|
||||
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
|
||||
|
@ -470,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc);
|
|||
// Sets up segment's quantization values, base_quant_ and filter strengths.
|
||||
void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
|
||||
// Pick best modes and fills the levels. Returns true if skipped.
|
||||
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
VP8RDLevel rd_opt);
|
||||
|
||||
// in alpha.c
|
||||
|
@ -490,19 +491,24 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta);
|
|||
|
||||
// misc utils for picture_*.c:
|
||||
|
||||
// Returns true if 'picture' is non-NULL and dimensions/colorspace are within
|
||||
// their valid ranges. If returning false, the 'error_code' in 'picture' is
|
||||
// updated.
|
||||
int WebPValidatePicture(const WebPPicture* const picture);
|
||||
|
||||
// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
|
||||
void WebPPictureResetBuffers(WebPPicture* const picture);
|
||||
|
||||
// Allocates ARGB buffer of given dimension (previous one is always free'd).
|
||||
// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
|
||||
// out-of-memory).
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
|
||||
// Allocates ARGB buffer according to set width/height (previous one is
|
||||
// always free'd). Preserves the YUV(A) buffer. Returns false in case of error
|
||||
// (invalid param, out-of-memory).
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture);
|
||||
|
||||
// Allocates YUVA buffer of given dimension (previous one is always free'd).
|
||||
// Uses picture->csp to determine whether an alpha buffer is needed.
|
||||
// Allocates YUVA buffer according to set width/height (previous one is always
|
||||
// free'd). Uses picture->csp to determine whether an alpha buffer is needed.
|
||||
// Preserves the ARGB buffer.
|
||||
// Returns false in case of error (invalid param, out-of-memory).
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture);
|
||||
|
||||
// Replace samples that are fully transparent by 'color' to help compressibility
|
||||
// (no guarantee, though). Assumes pic->use_argb is true.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -89,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config,
|
|||
|
||||
// Encodes the main image stream using the supplied bit writer.
|
||||
// If 'use_cache' is false, disables the use of color cache.
|
||||
WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
|
||||
const WebPPicture* const picture,
|
||||
VP8LBitWriter* const bw, int use_cache);
|
||||
// Returns false in case of error (stored in picture->error_code).
|
||||
int VP8LEncodeStream(const WebPConfig* const config,
|
||||
const WebPPicture* const picture, VP8LBitWriter* const bw,
|
||||
int use_cache);
|
||||
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
// in near_lossless.c
|
||||
|
@ -103,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
|
|||
//------------------------------------------------------------------------------
|
||||
// Image transforms in predictor.c.
|
||||
|
||||
void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless, int exact,
|
||||
int used_subtract_green);
|
||||
// pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless, int exact,
|
||||
int used_subtract_green, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent);
|
||||
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image);
|
||||
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -336,9 +336,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
|
|||
if (!WebPValidateConfig(config)) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
if (pic->width <= 0 || pic->height <= 0) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
if (!WebPValidatePicture(pic)) return 0;
|
||||
if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
|
|
|
@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) {
|
|||
err = ChunkAssignData(&chunk, data, copy_data, tag); \
|
||||
if (err == WEBP_MUX_OK) { \
|
||||
err = ChunkSetHead(&chunk, (LIST)); \
|
||||
if (err != WEBP_MUX_OK) ChunkRelease(&chunk); \
|
||||
} \
|
||||
return err; \
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ extern "C" {
|
|||
|
||||
#define MUX_MAJ_VERSION 1
|
||||
#define MUX_MIN_VERSION 2
|
||||
#define MUX_REV_VERSION 2
|
||||
#define MUX_REV_VERSION 4
|
||||
|
||||
// Chunk object.
|
||||
typedef struct WebPChunk WebPChunk;
|
||||
|
|
|
@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk,
|
|||
|
||||
WebPMuxError ChunkAppend(WebPChunk* const chunk,
|
||||
WebPChunk*** const chunk_list) {
|
||||
WebPMuxError err;
|
||||
assert(chunk_list != NULL && *chunk_list != NULL);
|
||||
|
||||
if (**chunk_list == NULL) {
|
||||
ChunkSetHead(chunk, *chunk_list);
|
||||
err = ChunkSetHead(chunk, *chunk_list);
|
||||
} else {
|
||||
WebPChunk* last_chunk = **chunk_list;
|
||||
while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_;
|
||||
ChunkSetHead(chunk, &last_chunk->next_);
|
||||
*chunk_list = &last_chunk->next_;
|
||||
err = ChunkSetHead(chunk, &last_chunk->next_);
|
||||
if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_;
|
||||
}
|
||||
return WEBP_MUX_OK;
|
||||
return err;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
|
|
@ -441,7 +441,7 @@ WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture,
|
|||
// the original dimension will be lost). Picture 'dst' need not be initialized
|
||||
// with WebPPictureInit() if it is different from 'src', since its content will
|
||||
// be overwritten.
|
||||
// Returns false in case of memory allocation error or invalid parameters.
|
||||
// Returns false in case of invalid parameters.
|
||||
WEBP_EXTERN int WebPPictureView(const WebPPicture* src,
|
||||
int left, int top, int width, int height,
|
||||
WebPPicture* dst);
|
||||
|
@ -455,7 +455,7 @@ WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture);
|
|||
// dimension will be calculated preserving the aspect ratio.
|
||||
// No gamma correction is applied.
|
||||
// Returns false in case of error (invalid parameter or insufficient memory).
|
||||
WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height);
|
||||
WEBP_EXTERN int WebPPictureRescale(WebPPicture* picture, int width, int height);
|
||||
|
||||
// Colorspace conversion function to import RGB samples.
|
||||
// Previous buffer will be free'd, if any.
|
||||
|
@ -526,7 +526,7 @@ WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture);
|
|||
// Remove the transparency information (if present) by blending the color with
|
||||
// the background color 'background_rgb' (specified as 24bit RGB triplet).
|
||||
// After this call, all alpha values are reset to 0xff.
|
||||
WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
|
||||
WEBP_EXTERN void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main call
|
||||
|
|
Loading…
Reference in New Issue