godot/drivers/webpold/enc/vp8enci.h

526 lines
20 KiB
C

// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// -----------------------------------------------------------------------------
//
// WebP encoder: internal header.
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_ENC_VP8ENCI_H_
#define WEBP_ENC_VP8ENCI_H_
#include <string.h> // for memcpy()
#include "../encode.h"
#include "../dsp/dsp.h"
#include "../utils/bit_writer.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
//------------------------------------------------------------------------------
// Various defines and enums
// version numbers
#define ENC_MAJ_VERSION 0
#define ENC_MIN_VERSION 2
#define ENC_REV_VERSION 0
// size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 64
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes
B_TM_PRED = 1,
B_VE_PRED = 2,
B_HE_PRED = 3,
B_RD_PRED = 4,
B_VR_PRED = 5,
B_LD_PRED = 6,
B_VL_PRED = 7,
B_HD_PRED = 8,
B_HU_PRED = 9,
NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
// Luma16 or UV modes
DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
};
enum { NUM_MB_SEGMENTS = 4,
MAX_NUM_PARTITIONS = 8,
NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
NUM_BANDS = 8,
NUM_CTX = 3,
NUM_PROBAS = 11,
MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost
};
// YUV-cache parameters. Cache is 16-pixels wide.
// The original or reconstructed samples can be accessed using VP8Scan[]
// The predicted blocks can be accessed using offsets to yuv_p_ and
// the arrays VP8*ModeOffsets[];
// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
// Y_OFF |YYYY| <- original samples (enc->yuv_in_)
// |YYYY|
// |YYYY|
// |YYYY|
// U_OFF |UUVV| V_OFF (=U_OFF + 8)
// |UUVV|
// +----+
// Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_')
// |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_')
// |YYYY|
// |YYYY|
// U_OFF |UUVV| V_OFF
// |UUVV|
// x2 (for yuv_out2_)
// +----+ Prediction area ('yuv_p_', size = PRED_SIZE)
// I16DC16 |YYYY| Intra16 predictions (16x16 block each)
// |YYYY|
// |YYYY|
// |YYYY|
// I16TM16 |YYYY|
// |YYYY|
// |YYYY|
// |YYYY|
// I16VE16 |YYYY|
// |YYYY|
// |YYYY|
// |YYYY|
// I16HE16 |YYYY|
// |YYYY|
// |YYYY|
// |YYYY|
// +----+ Chroma U/V predictions (16x8 block each)
// C8DC8 |UUVV|
// |UUVV|
// C8TM8 |UUVV|
// |UUVV|
// C8VE8 |UUVV|
// |UUVV|
// C8HE8 |UUVV|
// |UUVV|
// +----+ Intra 4x4 predictions (4x4 block each)
// |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
// |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
// |YY..| I4HD4 I4HU4 I4TMP
// +----+
#define BPS 16 // this is the common stride
#define Y_SIZE (BPS * 16)
#define UV_SIZE (BPS * 8)
#define YUV_SIZE (Y_SIZE + UV_SIZE)
#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
#define Y_OFF (0)
#define U_OFF (Y_SIZE)
#define V_OFF (U_OFF + 8)
#define ALIGN_CST 15
#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
extern const int VP8Scan[16 + 4 + 4]; // in quant.c
extern const int VP8UVModeOffsets[4]; // in analyze.c
extern const int VP8I16ModeOffsets[4];
extern const int VP8I4ModeOffsets[NUM_BMODES];
// Layout of prediction blocks
// intra 16x16
#define I16DC16 (0 * 16 * BPS)
#define I16TM16 (1 * 16 * BPS)
#define I16VE16 (2 * 16 * BPS)
#define I16HE16 (3 * 16 * BPS)
// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
#define C8DC8 (4 * 16 * BPS)
#define C8TM8 (4 * 16 * BPS + 8 * BPS)
#define C8VE8 (5 * 16 * BPS)
#define C8HE8 (5 * 16 * BPS + 8 * BPS)
// intra 4x4
#define I4DC4 (6 * 16 * BPS + 0)
#define I4TM4 (6 * 16 * BPS + 4)
#define I4VE4 (6 * 16 * BPS + 8)
#define I4HE4 (6 * 16 * BPS + 12)
#define I4RD4 (6 * 16 * BPS + 4 * BPS + 0)
#define I4VR4 (6 * 16 * BPS + 4 * BPS + 4)
#define I4LD4 (6 * 16 * BPS + 4 * BPS + 8)
#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
#define I4HD4 (6 * 16 * BPS + 8 * BPS + 0)
#define I4HU4 (6 * 16 * BPS + 8 * BPS + 4)
#define I4TMP (6 * 16 * BPS + 8 * BPS + 8)
typedef int64_t score_t; // type used for scores, rate, distortion
#define MAX_COST ((score_t)0x7fffffffffffffLL)
#define QFIX 17
#define BIAS(b) ((b) << (QFIX - 8))
// Fun fact: this is the _only_ line where we're actually being lossy and
// discarding bits.
static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
return (n * iQ + B) >> QFIX;
}
extern const uint8_t VP8Zigzag[16];
//------------------------------------------------------------------------------
// Headers
typedef uint32_t proba_t; // 16b + 16b
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
typedef struct VP8Encoder VP8Encoder;
// segment features
typedef struct {
int num_segments_; // Actual number of segments. 1 segment only = unused.
int update_map_; // whether to update the segment map or not.
// must be 0 if there's only 1 segment.
int size_; // bit-cost for transmitting the segment map
} VP8SegmentHeader;
// Struct collecting all frame-persistent probabilities.
typedef struct {
uint8_t segments_[3]; // probabilities for segment tree
uint8_t skip_proba_; // final probability of being skipped.
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k
int dirty_; // if true, need to call VP8CalculateLevelCosts()
int use_skip_proba_; // Note: we always use skip_proba for now.
int nb_skip_; // number of skipped blocks
} VP8Proba;
// Filter parameters. Not actually used in the code (we don't perform
// the in-loop filtering), but filled from user's config
typedef struct {
int simple_; // filtering type: 0=complex, 1=simple
int level_; // base filter level [0..63]
int sharpness_; // [0..7]
int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
} VP8FilterHeader;
//------------------------------------------------------------------------------
// Informations about the macroblocks.
typedef struct {
// block type
unsigned int type_:2; // 0=i4x4, 1=i16x16
unsigned int uv_mode_:2;
unsigned int skip_:1;
unsigned int segment_:2;
uint8_t alpha_; // quantization-susceptibility
} VP8MBInfo;
typedef struct VP8Matrix {
uint16_t q_[16]; // quantizer steps
uint16_t iq_[16]; // reciprocals, fixed point.
uint16_t bias_[16]; // rounding bias
uint16_t zthresh_[16]; // value under which a coefficient is zeroed
uint16_t sharpen_[16]; // frequency boosters for slight sharpening
} VP8Matrix;
typedef struct {
VP8Matrix y1_, y2_, uv_; // quantization matrices
int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral.
// Lower values indicate a lower risk of blurriness.
int beta_; // filter-susceptibility, range [0,255].
int quant_; // final segment quantizer.
int fstrength_; // final in-loop filtering strength
// reactivities
int lambda_i16_, lambda_i4_, lambda_uv_;
int lambda_mode_, lambda_trellis_, tlambda_;
int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
} VP8SegmentInfo;
// Handy transcient struct to accumulate score and info during RD-optimization
// and mode evaluation.
typedef struct {
score_t D, SD, R, score; // Distortion, spectral distortion, rate, score.
int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
int16_t y_ac_levels[16][16];
int16_t uv_levels[4 + 4][16];
int mode_i16; // mode number for intra16 prediction
uint8_t modes_i4[16]; // mode numbers for intra4 predictions
int mode_uv; // mode number of chroma prediction
uint32_t nz; // non-zero blocks
} VP8ModeScore;
// Iterator structure to iterate through macroblocks, pointing to the
// right neighbouring data (samples, predictions, contexts, ...)
typedef struct {
int x_, y_; // current macroblock
int y_offset_, uv_offset_; // offset to the luma / chroma planes
int y_stride_, uv_stride_; // respective strides
uint8_t* yuv_in_; // borrowed from enc_ (for now)
uint8_t* yuv_out_; // ''
uint8_t* yuv_out2_; // ''
uint8_t* yuv_p_; // ''
VP8Encoder* enc_; // back-pointer
VP8MBInfo* mb_; // current macroblock
VP8BitWriter* bw_; // current bit-writer
uint8_t* preds_; // intra mode predictors (4x4 blocks)
uint32_t* nz_; // non-zero pattern
uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
uint8_t* i4_top_; // pointer to the current top boundary sample
int i4_; // current intra4x4 mode being tested
int top_nz_[9]; // top-non-zero context.
int left_nz_[9]; // left-non-zero. left_nz[8] is independent.
uint64_t bit_count_[4][3]; // bit counters for coded levels.
uint64_t luma_bits_; // macroblock bit-cost for luma
uint64_t uv_bits_; // macroblock bit-cost for chroma
LFStats* lf_stats_; // filter stats (borrowed from enc_)
int do_trellis_; // if true, perform extra level optimisation
int done_; // true when scan is finished
int percent0_; // saved initial progress percent
} VP8EncIterator;
// in iterator.c
// must be called first.
void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
// restart a scan.
void VP8IteratorReset(VP8EncIterator* const it);
// import samples from source
void VP8IteratorImport(const VP8EncIterator* const it);
// export decimated samples
void VP8IteratorExport(const VP8EncIterator* const it);
// go to next macroblock. Returns !done_. If *block_to_save is non-null, will
// save the boundary values to top_/left_ arrays. block_to_save can be
// it->yuv_out_ or it->yuv_in_.
int VP8IteratorNext(VP8EncIterator* const it,
const uint8_t* const block_to_save);
// Report progression based on macroblock rows. Return 0 for user-abort request.
int VP8IteratorProgress(const VP8EncIterator* const it,
int final_delta_percent);
// Intra4x4 iterations
void VP8IteratorStartI4(VP8EncIterator* const it);
// returns true if not done.
int VP8IteratorRotateI4(VP8EncIterator* const it,
const uint8_t* const yuv_out);
// Non-zero context setup/teardown
void VP8IteratorNzToBytes(VP8EncIterator* const it);
void VP8IteratorBytesToNz(VP8EncIterator* const it);
// Helper functions to set mode properties
void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
void VP8SetSkip(const VP8EncIterator* const it, int skip);
void VP8SetSegment(const VP8EncIterator* const it, int segment);
//------------------------------------------------------------------------------
// Paginated token buffer
// WIP: #define USE_TOKEN_BUFFER
#ifdef USE_TOKEN_BUFFER
#define MAX_NUM_TOKEN 2048
typedef struct VP8Tokens VP8Tokens;
struct VP8Tokens {
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot
int left_;
VP8Tokens* next_;
};
typedef struct {
VP8Tokens* rows_;
uint16_t* tokens_; // set to (*last_)->tokens_
VP8Tokens** last_;
int left_;
int error_; // true in case of malloc error
} VP8TBuffer;
void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page
void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas);
static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
int bit, int proba_idx) {
if (b->left_ > 0 || VP8TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | proba_idx;
}
return bit;
}
#endif // USE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// VP8Encoder
struct VP8Encoder {
const WebPConfig* config_; // user configuration and parameters
WebPPicture* pic_; // input / output picture
// headers
VP8FilterHeader filter_hdr_; // filtering information
VP8SegmentHeader segment_hdr_; // segment information
int profile_; // VP8's profile, deduced from Config.
// dimension, in macroblock units.
int mb_w_, mb_h_;
int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
// number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
int num_parts_;
// per-partition boolean decoders.
VP8BitWriter bw_; // part0
VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
int percent_; // for progress
// transparency blob
int has_alpha_;
uint8_t* alpha_data_; // non-NULL if transparency is present
uint32_t alpha_data_size_;
// enhancement layer
int use_layer_;
VP8BitWriter layer_bw_;
uint8_t* layer_data_;
size_t layer_data_size_;
// quantization info (one set of DC/AC dequant factor per segment)
VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
int base_quant_; // nominal quantizer value. Only used
// for relative coding of segments' quant.
int uv_alpha_; // U/V quantization susceptibility
// global offset of quantizers, shared by all segments
int dq_y1_dc_;
int dq_y2_dc_, dq_y2_ac_;
int dq_uv_dc_, dq_uv_ac_;
// probabilities and statistics
VP8Proba proba_;
uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
uint64_t sse_count_; // pixel count for the sse_[] stats
int coded_size_;
int residual_bytes_[3][4];
int block_count_[3];
// quality/speed settings
int method_; // 0=fastest, 6=best/slowest.
int rd_opt_level_; // Deduced from method_.
int max_i4_header_bits_; // partition #0 safeness factor
// Memory
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
uint32_t* nz_; // non-zero bit context: mb_w+1
uint8_t* yuv_in_; // input samples
uint8_t* yuv_out_; // output samples
uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_.
uint8_t* yuv_p_; // scratch buffer for prediction
uint8_t *y_top_; // top luma samples.
uint8_t *uv_top_; // top u/v samples.
// U and V are packed into 16 pixels (8 U + 8 V)
uint8_t *y_left_; // left luma samples (adressable from index -1 to 15).
uint8_t *u_left_; // left u samples (adressable from index -1 to 7)
uint8_t *v_left_; // left v samples (adressable from index -1 to 7)
LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off)
};
//------------------------------------------------------------------------------
// internal functions. Not public.
// in tree.c
extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
extern const uint8_t
VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
// Reset the token probabilities to their initial (default) values
void VP8DefaultProbas(VP8Encoder* const enc);
// Write the token probabilities
void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
// Writes the partition #0 modes (that is: all intra modes)
void VP8CodeIntraModes(VP8Encoder* const enc);
// in syntax.c
// Generates the final bitstream by coding the partition0 and headers,
// and appending an assembly of all the pre-coded token partitions.
// Return true if everything is ok.
int VP8EncWrite(VP8Encoder* const enc);
// Release memory allocated for bit-writing in VP8EncLoop & seq.
void VP8EncFreeBitWriters(VP8Encoder* const enc);
// in frame.c
extern const uint8_t VP8EncBands[16 + 1];
// Form all the four Intra16x16 predictions in the yuv_p_ cache
void VP8MakeLuma16Preds(const VP8EncIterator* const it);
// Form all the four Chroma8x8 predictions in the yuv_p_ cache
void VP8MakeChroma8Preds(const VP8EncIterator* const it);
// Form all the ten Intra4x4 predictions in the yuv_p_ cache
// for the 4x4 block it->i4_
void VP8MakeIntra4Preds(const VP8EncIterator* const it);
// Rate calculation
int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
// Main stat / coding passes
int VP8EncLoop(VP8Encoder* const enc);
int VP8StatLoop(VP8Encoder* const enc);
// in webpenc.c
// Assign an error code to a picture. Return false for convenience.
int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
int WebPReportProgress(const WebPPicture* const pic,
int percent, int* const percent_store);
// in analysis.c
// Main analysis loop. Decides the segmentations and complexity.
// Assigns a first guess for Intra16 and uvmode_ prediction modes.
int VP8EncAnalyze(VP8Encoder* const enc);
// in quant.c
// Sets up segment's quantization values, base_quant_ and filter strengths.
void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
// Pick best modes and fills the levels. Returns true if skipped.
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
// in alpha.c
void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
// in layer.c
void VP8EncInitLayer(VP8Encoder* const enc); // init everything
void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock
int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding
void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory
// in filter.c
// SSIM utils
typedef struct {
double w, xm, ym, xxm, xym, yym;
} DistoStats;
void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int W, int H, DistoStats* const stats);
double VP8SSIMGet(const DistoStats* const stats);
double VP8SSIMGetSquaredError(const DistoStats* const stats);
// autofilter
void VP8InitFilter(VP8EncIterator* const it);
void VP8StoreFilterStats(VP8EncIterator* const it);
void VP8AdjustFilterStrength(VP8EncIterator* const it);
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif
#endif /* WEBP_ENC_VP8ENCI_H_ */