Update zstd to 1.4.4

(cherry picked from commit 55afd6e784)
This commit is contained in:
Jonathan Mannancheril 2019-11-09 22:31:00 -06:00 committed by Rémi Verschelde
parent c563839355
commit bc31b11fdd
27 changed files with 1852 additions and 831 deletions

View File

@ -537,7 +537,7 @@ Files extracted from upstream source:
## zstd
- Upstream: https://github.com/facebook/zstd
- Version: 1.4.3
- Version: 1.4.4
- License: BSD-3-Clause
Files extracted from upstream source:

View File

@ -164,7 +164,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
@ -244,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
assert(bitC->ptr <= bitC->endPtr);
bitC->bitPos &= 7;
bitC->bitContainer >>= nbBytes*8;
}
@ -260,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;

View File

@ -61,6 +61,13 @@
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
#endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
@ -127,9 +134,14 @@
} \
}
/* vectorization */
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
#if !defined(__clang__) && defined(__GNUC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else
# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
# endif
#else
# define DONT_VECTORIZE
#endif

View File

@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
*******************************************/
/* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512
#define FSE_BLOCKBOUND(size) (size + (size>>7))
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */

View File

@ -52,7 +52,9 @@
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
/* check and forward error code */
#ifndef CHECK_F
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
#endif
/* **************************************************************

View File

@ -47,6 +47,79 @@ extern "C" {
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
/* detects whether we are being compiled under msan */
#if defined (__has_feature)
# if __has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1
# endif
#endif
#if defined (MEMORY_SANITIZER)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stdint.h> /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
#endif
/* detects whether we are being compiled under asan */
#if defined (__has_feature)
# if __has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1
# endif
#elif defined(__SANITIZE_ADDRESS__)
# define ADDRESS_SANITIZER 1
#endif
#if defined (ADDRESS_SANITIZER)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
/*-**************************************************************
* Basic Types

View File

@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
ctx->queueEmpty = 1;
(void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
(void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
(void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
{
int error = 0;
error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
if (error) { POOL_free(ctx); return NULL; }
}
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);

View File

@ -14,6 +14,8 @@
* This file will hold wrapper for systems, which do not support pthreads
*/
#include "threading.h"
/* create fake symbol to avoid empty translation unit warning */
int g_ZSTD_threading_useless_symbol;
@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol;
/* === Dependencies === */
#include <process.h>
#include <errno.h>
#include "threading.h"
/* === Implementation === */
@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
}
#endif /* ZSTD_MULTITHREAD */
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
#include <stdlib.h>
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
if (!*mutex)
return 1;
return pthread_mutex_init(*mutex, attr);
}
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
{
if (!*mutex)
return 0;
{
int const ret = pthread_mutex_destroy(*mutex);
free(*mutex);
return ret;
}
}
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
if (!*cond)
return 1;
return pthread_cond_init(*cond, attr);
}
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
{
if (!*cond)
return 0;
{
int const ret = pthread_cond_destroy(*cond);
free(*cond);
return ret;
}
}
#endif

View File

@ -13,6 +13,8 @@
#ifndef THREADING_H_938743
#define THREADING_H_938743
#include "debug.h"
#if defined (__cplusplus)
extern "C" {
#endif
@ -79,6 +81,8 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
/* === POSIX Systems === */
# include <pthread.h>
#if DEBUGLEVEL < 1
#define ZSTD_pthread_mutex_t pthread_mutex_t
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#else /* DEBUGLEVEL >= 1 */
/* Debug implementation of threading.
* In this implementation we use pointers for mutexes and condition variables.
* This way, if we forget to init/destroy them the program will crash or ASAN
* will report leaks.
*/
#define ZSTD_pthread_mutex_t pthread_mutex_t*
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
#define ZSTD_pthread_cond_t pthread_cond_t*
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#endif
#else /* ZSTD_MULTITHREAD not defined */
/* No multithreading support */

View File

@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 8
#define VECLEN 16
#define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16
typedef enum {
ZSTD_no_overlap,
ZSTD_overlap_src_before_dst,
ZSTD_overlap_src_before_dst
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* @param ovtype controls the overlap detection
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
* The src buffer must be before the dst buffer.
*/
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
/* Handle short offset copies. */
do {
COPY8(op, ip)
} while (op < oend);
} else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
/* Separate out the first two COPY16() calls because the copy length is
* almost certain to be short, so the branches have different
* probabilities.
* On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
* On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
*/
COPY16(op, ip);
COPY16(op, ip);
if (op >= oend) return;
do {
COPY16(op, ip);
COPY16(op, ip);
}
while (op < oend);
}
}
/*! ZSTD_wildcopy_16min() :
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(length >= 8);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
}
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
{
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = (BYTE*)dstEnd;
do
COPY8(op, ip)
while (op < oend);
}
/*-*******************************************
* Private declarations
@ -323,7 +300,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@
* Dependencies
***************************************/
#include "zstd_internal.h"
#include "zstd_cwksp.h"
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
@ -192,6 +193,13 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
typedef struct {
int collectSequences;
ZSTD_Sequence* seqStart;
size_t seqIndex;
size_t maxSequences;
} SeqCollector;
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
@ -203,6 +211,9 @@ struct ZSTD_CCtx_params_s {
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */
int srcSizeHint; /* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode;
@ -228,9 +239,7 @@ struct ZSTD_CCtx_s {
ZSTD_CCtx_params appliedParams;
U32 dictID;
int workSpaceOversizedDuration;
void* workSpace;
size_t workSpaceSize;
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
size_t blockSize;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize;
@ -238,6 +247,8 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
SeqCollector seqCollector;
int isFirstBlock;
seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */
@ -337,26 +348,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
return (srcSize >> minlog) + 2;
}
/*! ZSTD_safecopyLiterals() :
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
* large copies.
*/
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
assert(iend > ilimit_w);
if (ip <= ilimit_w) {
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
op += ilimit_w - ip;
ip = ilimit_w;
}
while (ip < iend) *op++ = *ip++;
}
/*! ZSTD_storeSeq() :
* Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
* `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* `mlBase` : matchLength - MINMATCH
* Allowed to overread literals up to litLimit.
*/
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
HINT_INLINE UNUSED_ATTR
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
assert(literals + litLength <= litLimit);
if (litEnd <= litLimit_w) {
/* Common case we can use wildcopy.
* First copy 16 bytes, because literals are likely short.
*/
assert(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
}
} else {
ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
}
seqStorePtr->lit += litLength;
/* literal Length */
@ -368,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
seqStorePtr->sequences[0].offset = offsetCode + 1;
seqStorePtr->sequences[0].offset = offCode + 1;
/* match Length */
if (mlBase>0xFFFF) {
@ -910,7 +952,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
@ -925,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize);
/* ZSTD_compress_advanced_internal() :
@ -934,7 +976,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_CCtx_params params);
const ZSTD_CCtx_params* params);
/* ZSTD_writeLastEmptyBlock() :

View File

@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
cLitSize = singleStream ?
HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
255, 11, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
255, 11, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
hType = set_repeat;

View File

@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */

View File

@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize)
void* entropyWorkspace, size_t entropyWorkspaceSize)
{
BYTE* op = (BYTE*)dst;
const BYTE* const oend = op + dstCapacity;
@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize);
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
return NCountSize;
}
}

View File

@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize);
void* entropyWorkspace, size_t entropyWorkspaceSize);
size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity,

535
thirdparty/zstd/compress/zstd_cwksp.h vendored Normal file
View File

@ -0,0 +1,535 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_CWKSP_H
#define ZSTD_CWKSP_H
/*-*************************************
* Dependencies
***************************************/
#include "zstd_internal.h"
#if defined (__cplusplus)
extern "C" {
#endif
/*-*************************************
* Constants
***************************************/
/* define "workspace is too large" as this number of times larger than needed */
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
/* when workspace is continuously too large
* during at least this number of times,
* context's memory usage is considered wasteful,
* because it's sized to handle a worst case scenario which rarely happens.
* In which case, resize it down to free some memory */
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
/* Since the workspace is effectively its own little malloc implementation /
* arena, when we run under ASAN, we should similarly insert redzones between
* each internal element of the workspace, so ASAN will catch overruns that
* reach outside an object but that stay inside the workspace.
*
* This defines the size of that redzone.
*/
#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
#endif
/*-*************************************
* Structures
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers,
ZSTD_cwksp_alloc_aligned
} ZSTD_cwksp_alloc_phase_e;
/**
* Zstd fits all its internal datastructures into a single continuous buffer,
* so that it only needs to perform a single OS allocation (or so that a buffer
* can be provided to it and it can perform no allocations at all). This buffer
* is called the workspace.
*
* Several optimizations complicate that process of allocating memory ranges
* from this workspace for each internal datastructure:
*
* - These different internal datastructures have different setup requirements:
*
* - The static objects need to be cleared once and can then be trivially
* reused for each compression.
*
* - Various buffers don't need to be initialized at all--they are always
* written into before they're read.
*
* - The matchstate tables have a unique requirement that they don't need
* their memory to be totally cleared, but they do need the memory to have
* some bound, i.e., a guarantee that all values in the memory they've been
* allocated is less than some maximum value (which is the starting value
* for the indices that they will then use for compression). When this
* guarantee is provided to them, they can use the memory without any setup
* work. When it can't, they have to clear the area.
*
* - These buffers also have different alignment requirements.
*
* - We would like to reuse the objects in the workspace for multiple
* compressions without having to perform any expensive reallocation or
* reinitialization work.
*
* - We would like to be able to efficiently reuse the workspace across
* multiple compressions **even when the compression parameters change** and
* we need to resize some of the objects (where possible).
*
* To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
* abstraction was created. It works as follows:
*
* Workspace Layout:
*
* [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
*
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
* so that literally everything fits in a single buffer. Note: if present,
* this must be the first object in the workspace, since ZSTD_free{CCtx,
* CDict}() rely on a pointer comparison to see whether one or two frees are
* required.
*
* - Fixed size objects: these are fixed-size, fixed-count objects that are
* nonetheless "dynamically" allocated in the workspace so that we can
* control how they're initialized separately from the broader ZSTD_CCtx.
* Examples:
* - Entropy Workspace
* - 2 x ZSTD_compressedBlockState_t
* - CDict dictionary contents
*
* - Tables: these are any of several different datastructures (hash tables,
* chain tables, binary trees) that all respect a common format: they are
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams.
*
* - Aligned: these buffers are used for various purposes that require 4 byte
* alignment, but don't require any initialization before they're used.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
* be moved around at no cost for a new compression.
*
* Allocating Memory:
*
* The various types of objects must be allocated in order, so they can be
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
* 2. Buffers
* 3. Aligned
* 4. Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
typedef struct {
void* workspace;
void* workspaceEnd;
void* objectEnd;
void* tableEnd;
void* tableValidEnd;
void* allocStart;
int allocFailed;
int workspaceOversizedDuration;
ZSTD_cwksp_alloc_phase_e phase;
} ZSTD_cwksp;
/*-*************************************
* Functions
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
assert(ws->workspace <= ws->objectEnd);
assert(ws->objectEnd <= ws->tableEnd);
assert(ws->objectEnd <= ws->tableValidEnd);
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
}
/**
* Align must be a power of 2.
*/
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
size_t const mask = align - 1;
assert((align & mask) == 0);
return (size + mask) & ~mask;
}
/**
* Use this to determine how much space in the workspace we will consume to
* allocate this object. (Normally it should be exactly the size of the object,
* but under special conditions, like ASAN, where we pad each object, it might
* be larger.)
*
* Since tables aren't currently redzoned, you don't need to call through this
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#else
return size;
#endif
}
MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
assert(phase >= ws->phase);
if (phase > ws->phase) {
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
phase >= ZSTD_cwksp_alloc_buffers) {
ws->tableValidEnd = ws->objectEnd;
}
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
/* If unaligned allocations down from a too-large top have left us
* unaligned, we need to realign our alloc ptr. Technically, this
* can consume space that is unaccounted for in the neededSpace
* calculation. However, I believe this can only happen when the
* workspace is too large, and specifically when it is too large
* by a larger margin than the space that will be consumed. */
/* TODO: cleaner, compiler warning friendly way to do this??? */
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
if (ws->allocStart < ws->tableValidEnd) {
ws->tableValidEnd = ws->allocStart;
}
}
ws->phase = phase;
}
}
/**
* Returns whether this object/buffer/etc was allocated in this workspace.
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
}
/**
* Internal function. Do not use directly.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
void* alloc;
void* bottom = ws->tableEnd;
ZSTD_cwksp_internal_advance_phase(ws, phase);
alloc = (BYTE *)ws->allocStart - bytes;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(alloc >= bottom);
if (alloc < bottom) {
DEBUGLOG(4, "cwksp: alloc failed!");
ws->allocFailed = 1;
return NULL;
}
if (alloc < ws->tableValidEnd) {
ws->tableValidEnd = alloc;
}
ws->allocStart = alloc;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
__asan_unpoison_memory_region(alloc, bytes);
#endif
return alloc;
}
/**
* Reserves and returns unaligned memory.
*/
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
}
/**
* Reserves and returns memory sized on and aligned on sizeof(unsigned).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
assert((bytes & (sizeof(U32)-1)) == 0);
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
}
/**
* Aligned on sizeof(unsigned). These buffers have the special property that
* their values remain constrained, allowing us to re-use them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
void* alloc = ws->tableEnd;
void* end = (BYTE *)alloc + bytes;
void* top = ws->allocStart;
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
assert((bytes & (sizeof(U32)-1)) == 0);
ZSTD_cwksp_internal_advance_phase(ws, phase);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(end <= top);
if (end > top) {
DEBUGLOG(4, "cwksp: table alloc failed!");
ws->allocFailed = 1;
return NULL;
}
ws->tableEnd = end;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
__asan_unpoison_memory_region(alloc, bytes);
#endif
return alloc;
}
/**
* Aligned on sizeof(void*).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
void* alloc = ws->objectEnd;
void* end = (BYTE*)alloc + roundedBytes;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5,
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
assert((bytes & (sizeof(void*)-1)) == 0);
ZSTD_cwksp_assert_internal_consistency(ws);
/* we must be in the first phase, no advance is possible */
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
DEBUGLOG(4, "cwksp: object alloc failed!");
ws->allocFailed = 1;
return NULL;
}
ws->objectEnd = end;
ws->tableEnd = end;
ws->tableValidEnd = end;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
__asan_unpoison_memory_region(alloc, bytes);
#endif
return alloc;
}
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
__msan_poison(ws->objectEnd, size);
}
#endif
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
ws->tableValidEnd = ws->objectEnd;
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
/**
* Zero the part of the allocated tables not already marked clean.
*/
MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
}
ZSTD_cwksp_mark_tables_clean(ws);
}
/**
* Invalidates table allocations.
* All other allocations remain valid.
*/
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing tables!");
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
#endif
ws->tableEnd = ws->objectEnd;
ZSTD_cwksp_assert_internal_consistency(ws);
}
/**
* Invalidates all buffer, aligned, and table allocations.
* Object allocations remain valid.
*/
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing!");
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
* the workspace (or at least the non-static, non-table parts of it)
* every time we start a new compression. */
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
}
#endif
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
#endif
ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd;
ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
ws->phase = ZSTD_cwksp_alloc_buffers;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
/**
* The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed
* buffer, if present, must be separately freed).
*/
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
ws->workspace = start;
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
ws->phase = ZSTD_cwksp_alloc_objects;
ZSTD_cwksp_clear(ws);
ws->workspaceOversizedDuration = 0;
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
void* workspace = ZSTD_malloc(size, customMem);
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
RETURN_ERROR_IF(workspace == NULL, memory_allocation);
ZSTD_cwksp_init(ws, workspace, size);
return 0;
}
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace");
memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_free(ptr, customMem);
}
/**
* Moves the management of a workspace from one cwksp to another. The src cwksp
* is left in an invalid state (src must be re-init()'ed before its used again).
*/
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
*dst = *src;
memset(src, 0, sizeof(ZSTD_cwksp));
}
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
return ws->allocFailed;
}
/*-*************************************
* Functions Checking Free Space
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
}
MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
}
MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
return ZSTD_cwksp_check_available(
ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
}
MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
&& ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
}
MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
ZSTD_cwksp* ws, size_t additionalNeededSpace) {
if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
ws->workspaceOversizedDuration++;
} else {
ws->workspaceOversizedDuration = 0;
}
}
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_CWKSP_H */

View File

@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
@ -247,7 +247,7 @@ _match_found:
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored:
/* match found */
@ -278,7 +278,7 @@ _match_stored:
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@ -297,7 +297,7 @@ _match_stored:
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;

View File

@ -8,7 +8,7 @@
* You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_compress_internal.h"
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
@ -43,8 +43,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic(
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls)
@ -74,8 +74,7 @@ size_t ZSTD_compressBlock_fast_generic(
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
ip0 += (ip0 == prefixStart);
ip1 = ip0 + 1;
{
U32 const maxRep = (U32)(ip0 - prefixStart);
{ U32 const maxRep = (U32)(ip0 - prefixStart);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
@ -118,8 +117,7 @@ size_t ZSTD_compressBlock_fast_generic(
match0 = match1;
goto _offset;
}
{
size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
assert(step >= 2);
ip0 += step;
ip1 += step;
@ -138,7 +136,7 @@ _offset: /* Requires: ip0, match0 */
_match: /* Requires: ip0, match0, offcode */
/* Count the forward length */
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
/* match found */
ip0 += mLength;
anchor = ip0;
@ -150,16 +148,15 @@ _match: /* Requires: ip0, match0, offcode */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
while ( (ip0 <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
&& (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
/* store sequence */
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += rLength;
ip1 = ip0 + 1;
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
anchor = ip0;
continue; /* faster when present (confirmed on gcc-8) ... (?) */
}
@ -179,8 +176,7 @@ size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32 const mls = cParams->minMatch;
U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls)
{
@ -265,7 +261,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
@ -285,7 +281,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
@ -300,7 +296,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@ -325,7 +321,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@ -348,8 +344,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32 const mls = cParams->minMatch;
U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState != NULL);
switch(mls)
{
@ -408,16 +403,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const U32 repIndex = current + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
size_t mLength;
hashTable[h] = current; /* update hash table */
assert(offset_1 <= current +1); /* check repIndex */
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
@ -427,18 +423,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
}
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
U32 offset;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
U32 const offset = current - matchIndex;
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
offset_2 = offset_1; offset_1 = offset; /* update offset history */
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ip += mLength;
anchor = ip;
} }
if (ip <= ilimit) {
/* Fill Table */
@ -448,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@ -476,8 +468,7 @@ size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32 const mls = cParams->minMatch;
U32 const mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */

View File

@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@ -828,7 +828,7 @@ _storeSequence:
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue;
@ -843,7 +843,7 @@ _storeSequence:
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@ -1066,7 +1066,7 @@ _storeSequence:
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */

View File

@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
{
size_t const ldmHSize = ((size_t)1) << params.hashLog;
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
size_t const ldmBucketSize =
((size_t)1) << (params.hashLog - ldmBucketSizeLog);
size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0;
}
@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[i] = rep[i-1];
rep[0] = sequence.offset;
/* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH);
ip += sequence.matchLength;

View File

@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
anchor += advance;
ip = anchor;
} }

View File

@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
/* init */
if (job->cdict) {
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} else { /* srcStart points at reloaded section */
@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast,
NULL, /*cdict*/
jobParams, pledgedSrcSize);
&jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} }
@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
unsigned jobID;
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
/* Copy the mutex/cond out */
ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
mtctx->jobs[jobID].cSize = 0;
/* Clear the job description, but keep the mutex/cond */
memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
mtctx->jobs[jobID].job_mutex = mutex;
mtctx->jobs[jobID].job_cond = cond;
}
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
mtctx->inBuff.buffer = g_nullBuffer;
mtctx->inBuff.filled = 0;
mtctx->allJobsCompleted = 1;
@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
/* Sets parameters relevant to the compression job,
* initializing others to default values. */
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
{
ZSTD_CCtx_params jobParams = params;
ZSTD_CCtx_params jobParams = *params;
/* Clear parameters related to multithreading */
jobParams.forceWindow = 0;
jobParams.nbWorkers = 0;
@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
{
unsigned jobLog;
if (params.ldmParams.enableLdm) {
if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead. */
jobLog = MAX(21, params.cParams.chainLog + 4);
jobLog = MAX(21, params->cParams.chainLog + 4);
} else {
jobLog = MAX(20, params.cParams.windowLog + 2);
jobLog = MAX(20, params->cParams.windowLog + 2);
}
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
}
@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
return ovlog;
}
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
{
int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
assert(0 <= overlapRLog && overlapRLog <= 8);
if (params.ldmParams.enableLdm) {
if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead.
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog;
}
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
}
static unsigned
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
{
assert(nbWorkers>0);
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params)
{
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
const char* const srcStart = (const char*)src;
@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
}
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) {
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
assert(singleThreadParams.nbWorkers == 0);
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
dict, dictSize, cdict,
singleThreadParams, pledgedSrcSize);
&singleThreadParams, pledgedSrcSize);
}
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
mtctx->cdict = cdict;
}
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
mtctx->targetSectionSize = params.jobSize;
if (mtctx->targetSectionSize == 0) {
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
}
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);

View File

@ -61,7 +61,9 @@
* Error Management
****************************************************************/
#define HUF_isError ERR_isError
#ifndef CHECK_F
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
#endif
/* **************************************************************

View File

@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
static size_t ZSTD_startingInputLength(ZSTD_format_e format)
{
size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
ZSTD_FRAMEHEADERSIZE_PREFIX;
ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
/* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
return startingInputLength;
@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long totalDstSize = 0;
while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
U32 const magicNumber = MEM_readLE32(src);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@ -629,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
/* check */
RETURN_ERROR_IF(
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
srcSize_wrong);
/* Frame Header */
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
srcSize_wrong);
@ -714,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
dictSize = ZSTD_DDict_dictSize(ddict);
}
while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
@ -1098,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
for (i=0; i<3; i++) {
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
dictionary_corrupted);
entropy->rep[i] = rep;
} }
@ -1267,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
{
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
ZSTD_clearDict(dctx);
if (dict && dictSize >= 8) {
if (dict && dictSize != 0) {
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
dctx->ddict = dctx->ddictLocal;
@ -1300,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
/* ZSTD_initDStream_usingDict() :
* return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
* return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
{
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
return ZSTD_FRAMEHEADERSIZE_PREFIX;
return ZSTD_startingInputLength(zds->format);
}
/* note : this variant can't fail */
@ -1324,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
return ZSTD_FRAMEHEADERSIZE_PREFIX;
return ZSTD_startingInputLength(dctx->format);
}
/* ZSTD_resetDStream() :
* return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
* return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
return ZSTD_FRAMEHEADERSIZE_PREFIX;
return ZSTD_startingInputLength(dctx->format);
}
@ -1564,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput;
}
input->pos = input->size;
return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
}
assert(ip != NULL);
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;

View File

@ -573,38 +573,118 @@ typedef struct {
size_t pos;
} seqState_t;
/*! ZSTD_overlapCopy8() :
* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
* If the offset is < 8 then the offset is spread to at least 8 bytes.
*
* Precondition: *ip <= *op
* Postcondition: *op - *op >= 8
*/
static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
assert(*ip <= *op);
if (offset < 8) {
/* close range match, overlap */
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
int const sub2 = dec64table[offset];
(*op)[0] = (*ip)[0];
(*op)[1] = (*ip)[1];
(*op)[2] = (*ip)[2];
(*op)[3] = (*ip)[3];
*ip += dec32table[offset];
ZSTD_copy4(*op+4, *ip);
*ip -= sub2;
} else {
ZSTD_copy8(*op, *ip);
}
*ip += 8;
*op += 8;
assert(*op - *ip >= 8);
}
/* ZSTD_execSequenceLast7():
* exceptional case : decompress a match starting within last 7 bytes of output buffer.
* requires more careful checks, to ensure there is no overflow.
* performance does not matter though.
* note : this case is supposed to be never generated "naturally" by reference encoder,
* since in most cases it needs at least 8 bytes to look for a match.
* but it's allowed by the specification. */
/*! ZSTD_safecopy() :
* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
* This function is only called in the uncommon case where the sequence is near the end of the block. It
* should be fast for a single long sequence, but can be slow for several short sequences.
*
* @param ovtype controls the overlap detection
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
* The src buffer must be before the dst buffer.
*/
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
ptrdiff_t const diff = op - ip;
BYTE* const oend = op + length;
assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
if (length < 8) {
/* Handle short lengths. */
while (op < oend) *op++ = *ip++;
return;
}
if (ovtype == ZSTD_overlap_src_before_dst) {
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
assert(length >= 8);
ZSTD_overlapCopy8(&op, &ip, diff);
assert(op - ip >= 8);
assert(op <= oend);
}
if (oend <= oend_w) {
/* No risk of overwrite. */
ZSTD_wildcopy(op, ip, length, ovtype);
return;
}
if (op <= oend_w) {
/* Wildcopy until we get close to the end. */
assert(oend > oend_w);
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
ip += oend_w - op;
op = oend_w;
}
/* Handle the leftovers. */
while (op < oend) *op++ = *ip++;
}
/* ZSTD_execSequenceEnd():
* This version handles cases that are near the end of the output buffer. It requires
* more careful checks to make sure there is no overflow. By separating out these hard
* and unlikely cases, we can speed up the common cases.
*
* NOTE: This function needs to be fast for a single long sequence, but doesn't need
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
*/
FORCE_NOINLINE
size_t ZSTD_execSequenceLast7(BYTE* op,
size_t ZSTD_execSequenceEnd(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
{
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
/* check */
RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
/* bounds checks */
assert(oLitEnd < oMatchEnd);
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
/* copy literals */
while (op < oLitEnd) *op++ = *(*litPtr)++;
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
op = oLitEnd;
*litPtr = iLitEnd;
/* copy Match */
if (sequence.offset > (size_t)(oLitEnd - base)) {
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix */
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
match = dictEnd - (base-match);
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
match = dictEnd - (prefixStart-match);
if (match + sequence.matchLength <= dictEnd) {
memmove(oLitEnd, match, sequence.matchLength);
return sequenceLength;
@ -614,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
memmove(oLitEnd, match, length1);
op = oLitEnd + length1;
sequence.matchLength -= length1;
match = base;
match = prefixStart;
} }
while (op < oMatchEnd) *op++ = *match++;
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
return sequenceLength;
}
HINT_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
@ -634,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
/* check */
RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
/* Errors and uncommon cases handled here. */
assert(oLitEnd < oMatchEnd);
if (iLitEnd > litLimit || oMatchEnd > oend_w)
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
/* copy Literals */
if (sequence.litLength > 8)
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr);
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
/* Copy Literals:
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
* We likely don't need the full 32-byte wildcopy.
*/
assert(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(op, (*litPtr));
if (sequence.litLength > 16) {
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
}
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
/* copy Match */
/* Copy Match */
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix -> go into extDict */
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@ -662,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
op = oLitEnd + length1;
sequence.matchLength -= length1;
match = prefixStart;
if (op > oend_w || sequence.matchLength < MINMATCH) {
U32 i;
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
return sequenceLength;
}
} }
/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
/* Match within prefix of 1 or more bytes */
assert(op <= oMatchEnd);
assert(oMatchEnd <= oend_w);
assert(match >= prefixStart);
assert(sequence.matchLength >= 1);
/* match within prefix */
if (sequence.offset < 8) {
/* close range match, overlap */
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
int const sub2 = dec64table[sequence.offset];
op[0] = match[0];
op[1] = match[1];
op[2] = match[2];
op[3] = match[3];
match += dec32table[sequence.offset];
ZSTD_copy4(op+4, match);
match -= sub2;
} else {
ZSTD_copy8(op, match);
}
op += 8; match += 8;
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}
HINT_INLINE
size_t ZSTD_execSequenceLong(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
{
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = sequence.match;
/* check */
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
/* copy Literals */
if (sequence.litLength > 8)
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
/* copy Match */
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix */
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
if (match + sequence.matchLength <= dictEnd) {
memmove(oLitEnd, match, sequence.matchLength);
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
* without overlap checking.
*/
if (sequence.offset >= WILDCOPY_VECLEN) {
/* We bet on a full wildcopy for matches, since we expect matches to be
* longer than literals (in general). In silesia, ~10% of matches are longer
* than 16 bytes.
*/
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
return sequenceLength;
}
/* span extDict & currentPrefixSegment */
{ size_t const length1 = dictEnd - match;
memmove(oLitEnd, match, length1);
op = oLitEnd + length1;
sequence.matchLength -= length1;
match = prefixStart;
if (op > oend_w || sequence.matchLength < MINMATCH) {
U32 i;
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
return sequenceLength;
}
} }
assert(op <= oend_w);
assert(sequence.matchLength >= MINMATCH);
assert(sequence.offset < WILDCOPY_VECLEN);
/* match within prefix */
if (sequence.offset < 8) {
/* close range match, overlap */
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
int const sub2 = dec64table[sequence.offset];
op[0] = match[0];
op[1] = match[1];
op[2] = match[2];
op[3] = match[3];
match += dec32table[sequence.offset];
ZSTD_copy4(op+4, match);
match -= sub2;
} else {
ZSTD_copy8(op, match);
}
op += 8; match += 8;
/* Copy 8 bytes and spread the offset to be >= 8. */
ZSTD_overlapCopy8(&op, &match, sequence.offset);
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
/* If the match length is > 8 bytes, then continue with the wildcopy. */
if (sequence.matchLength > 8) {
assert(op < oMatchEnd);
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
}
return sequenceLength;
}
@ -1098,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
/* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
sequences[seqNb & STORED_SEQS_MASK] = sequence;
@ -1109,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
/* finish queue */
seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) {
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}

220
thirdparty/zstd/zstd.h vendored
View File

@ -15,6 +15,7 @@ extern "C" {
#define ZSTD_H_235446
/* ====== Dependency ======*/
#include <limits.h> /* INT_MAX */
#include <stddef.h> /* size_t */
@ -71,7 +72,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4
#define ZSTD_VERSION_RELEASE 3
#define ZSTD_VERSION_RELEASE 4
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
@ -196,9 +197,13 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
/*! ZSTD_compressCCtx() :
* Same as ZSTD_compress(), using an explicit ZSTD_CCtx
* The function will compress at requested compression level,
* ignoring any other parameter */
* Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
* Important : in order to behave similarly to `ZSTD_compress()`,
* this function compresses at requested compression level,
* __ignoring any other parameter__ .
* If any advanced parameter was set using the advanced API,
* they will all be reset. Only `compressionLevel` remains.
*/
ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@ -233,7 +238,7 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
* using ZSTD_CCtx_set*() functions.
* Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
* "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
* They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()
* __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
*
* It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
*
@ -261,18 +266,26 @@ typedef enum {
/* compression parameters
* Note: When compressing with a ZSTD_CDict these parameters are superseded
* by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
* for more info (superseded-by-cdict). */
ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
* by the parameters used to construct the ZSTD_CDict.
* See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
* Note that exact compression parameters are dynamically determined,
* depending on both compression level and srcSize (when known).
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level.
* Note 2 : setting a level sets all default values of other compression parameters */
* Note 2 : setting a level resets all other compression parameters to default */
/* Advanced compression parameters :
* It's possible to pin down compression parameters to some specific values.
* In which case, these values are no longer dynamically selected by the compressor */
ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2.
* This will set a memory budget for streaming decompression,
* with larger values requiring more memory
* and typically compressing more.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* Special: value 0 means "use default windowLog".
* Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
* requires explicitly allowing such window size at decompression stage if using streaming. */
* requires explicitly allowing such size at streaming decompression stage. */
ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2.
* Resulting memory usage is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
@ -283,13 +296,13 @@ typedef enum {
* Resulting memory usage is (1 << (chainLog+2)).
* Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
* Larger tables result in better and slower compression.
* This parameter is useless when using "fast" strategy.
* This parameter is useless for "fast" strategy.
* It's still useful when using "dfast" strategy,
* in which case it defines a secondary probe table.
* Special: value 0 means "use default chainLog". */
ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2.
* More attempts result in better and slower compression.
* This parameter is useless when using "fast" and "dFast" strategies.
* This parameter is useless for "fast" and "dFast" strategies.
* Special: value 0 means "use default searchLog". */
ZSTD_c_minMatch=105, /* Minimum size of searched matches.
* Note that Zstandard can still find matches of smaller size,
@ -344,7 +357,7 @@ typedef enum {
ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
* Content size must be known at the beginning of compression.
* This is automatically the case when using ZSTD_compress2(),
* For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
* For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
@ -363,7 +376,7 @@ typedef enum {
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
* The minimum size is automatically and transparently enforced */
* The minimum size is automatically and transparently enforced. */
ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size.
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
@ -386,6 +399,7 @@ typedef enum {
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@ -396,6 +410,7 @@ typedef enum {
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
ZSTD_c_experimentalParam7=1004
} ZSTD_cParameter;
typedef struct {
@ -793,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
typedef struct ZSTD_CDict_s ZSTD_CDict;
/*! ZSTD_createCDict() :
* When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
* When compressing multiple messages or blocks using the same dictionary,
* it's recommended to digest the dictionary only once, since it's a costly operation.
* ZSTD_createCDict() will create a state from digesting a dictionary.
* The resulting state can be used for future compression operations with very limited startup cost.
* ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
* `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
* Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
* Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
* @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
* Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
* Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
* in which case the only thing that it transports is the @compressionLevel.
* This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
* expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
@ -925,7 +945,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
* Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
* It's a CPU consuming operation, with non-negligible impact on latency.
* If there is a need to use the same prefix multiple times, consider loadDictionary instead.
* Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
* Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
* Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
const void* prefix, size_t prefixSize);
@ -969,7 +989,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
* Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
* Prefix buffer must remain unmodified up to the end of frame,
* reached when ZSTD_decompressStream() returns 0.
* Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
* Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
* Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
* Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
* A full dictionary is more costly, as it requires building tables.
@ -1014,8 +1034,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* Some of them might be removed in the future (especially when redundant with existing stable functions)
* ***************************************************************************************/
#define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */
#define ZSTD_FRAMEHEADERSIZE_MIN 6
#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */
#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2)
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */
#define ZSTD_SKIPPABLEHEADERSIZE 8
@ -1063,6 +1083,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
/* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_SRCSIZEHINT_MIN 0
#define ZSTD_SRCSIZEHINT_MAX INT_MAX
/* internal */
#define ZSTD_HASHLOG3_MAX 17
@ -1072,6 +1094,24 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
typedef struct {
unsigned int matchPos; /* Match pos in dst */
/* If seqDef.offset > 3, then this is seqDef.offset - 3
* If seqDef.offset < 3, then this is the corresponding repeat offset
* But if seqDef.offset < 3 and litLength == 0, this is the
* repeat offset before the corresponding repeat offset
* And if seqDef.offset == 3 and litLength == 0, this is the
* most recent repeat offset - 1
*/
unsigned int offset;
unsigned int litLength; /* Literal length */
unsigned int matchLength; /* Match length */
/* 0 when seq not rep and seqDef.offset otherwise
* when litLength == 0 this will be <= 4, otherwise <= 3 like normal
*/
unsigned int rep;
} ZSTD_Sequence;
typedef struct {
unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
@ -1101,21 +1141,12 @@ typedef enum {
typedef enum {
ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
ZSTD_dlm_byRef = 1, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
} ZSTD_dictLoadMethod_e;
typedef enum {
/* Opened question : should we have a format ZSTD_f_auto ?
* Today, it would mean exactly the same as ZSTD_f_zstd1.
* But, in the future, should several formats become supported,
* on the compression side, it would mean "default format".
* On the decompression side, it would mean "automatic format detection",
* so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
* Since meaning is a little different, another option could be to define different enums for compression and decompression.
* This question could be kept for later, when there are actually multiple formats to support,
* but there is also the question of pinning enum values, and pinning value `0` is especially important */
ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number.
ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number.
* Useful to save 4 bytes per generated frame.
* Decoder cannot recognise automatically this format, requiring this instruction. */
} ZSTD_format_e;
@ -1126,7 +1157,7 @@ typedef enum {
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
* Zstd currently supports the use of a CDict in two ways:
* Zstd currently supports the use of a CDict in three ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
@ -1142,6 +1173,12 @@ typedef enum {
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
* - The CDict's tables are not used at all, and instead we use the working
* context alone to reload the dictionary and use params based on the source
* size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
* This method is effective when the dictionary sizes are very small relative
* to the input size, and the input size is fairly large to begin with.
*
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
@ -1150,6 +1187,7 @@ typedef enum {
ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
ZSTD_dictForceLoad = 3 /* Always reload the dictionary */
} ZSTD_dictAttachPref_e;
typedef enum {
@ -1158,7 +1196,7 @@ typedef enum {
* levels will be compressed. */
ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be
* emitted if Huffman compression is not profitable. */
ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */
ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */
} ZSTD_literalCompressionMode_e;
@ -1210,20 +1248,38 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
* or an error code (if srcSize is too small) */
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/*! ZSTD_getSequences() :
* Extract sequences from the sequence store
* zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2
* @return : number of sequences extracted
*/
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize);
/***************************************
* Memory management
***************************************/
/*! ZSTD_estimate*() :
* These functions make it possible to estimate memory usage
* of a future {D,C}Ctx, before its creation.
* ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
* It will also consider src size to be arbitrarily "large", which is worst case.
* If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
* ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
* Note : CCtx size estimation is only correct for single-threaded compression. */
* These functions make it possible to estimate memory usage of a future
* {D,C}Ctx, before its creation.
*
* ZSTD_estimateCCtxSize() will provide a budget large enough for any
* compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
* this estimate does not include space for a window buffer, so this estimate
* is guaranteed to be enough for single-shot compressions, but not streaming
* compressions. It will however assume the input may be arbitrarily large,
* which is the worst case. If srcSize is known to always be small,
* ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
* ZSTD_getCParams() to create cParams from compressionLevel.
* ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
* ZSTD_CCtxParams_setParameter().
*
* Note: only single-threaded compression is supported. This function will
* return an error code if ZSTD_c_nbWorkers is >= 1. */
ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@ -1334,7 +1390,8 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
* Create a digested dictionary for compression
* Dictionary content is just referenced, not duplicated.
* As a consequence, `dictBuffer` **must** outlive CDict,
* and its content must remain unmodified throughout the lifetime of CDict. */
* and its content must remain unmodified throughout the lifetime of CDict.
* note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
/*! ZSTD_getCParams() :
@ -1361,7 +1418,9 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
/*! ZSTD_compress_advanced() :
* Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */
* Note : this function is now DEPRECATED.
* It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
* This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@ -1369,7 +1428,9 @@ ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
ZSTD_parameters params);
/*! ZSTD_compress_usingCDict_advanced() :
* Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
* Note : this function is now REDUNDANT.
* It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
* This prototype will be marked as deprecated and generate compilation warning in some future version */
ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@ -1441,6 +1502,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size,
* but compression ratio may regress significantly if guess considerably underestimates */
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
@ -1613,8 +1680,13 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
* pledgedSrcSize must be correct. If it is not known at init time, use
* ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
* "0" also disables frame content size field. It may be enabled in the future.
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
ZSTDLIB_API size_t
ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
int compressionLevel,
unsigned long long pledgedSrcSize);
/**! ZSTD_initCStream_usingDict() :
* This function is deprecated, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@ -1623,42 +1695,66 @@ ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLe
*
* Creates of an internal CDict (incompatible with static CCtx), except if
* dict == NULL or dictSize < 8, in which case no dict is used.
* Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
* Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
* it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t
ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
int compressionLevel);
/**! ZSTD_initCStream_advanced() :
* This function is deprecated, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
* // Pseudocode: Set each zstd parameter and leave the rest as-is.
* for ((param, value) : params) {
* ZSTD_CCtx_setParameter(zcs, param, value);
* }
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
*
* pledgedSrcSize must be correct. If srcSize is not known at init time, use
* value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
* dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
* pledgedSrcSize must be correct.
* If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize);
ZSTDLIB_API size_t
ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params,
unsigned long long pledgedSrcSize);
/**! ZSTD_initCStream_usingCDict() :
* This function is deprecated, and equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
* note : cdict will just be referenced, and must outlive compression session
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
/**! ZSTD_initCStream_usingCDict_advanced() :
* This function is deprecated, and is approximately equivalent to:
* This function is DEPRECATED, and is approximately equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
* ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
* // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
* for ((fParam, value) : fParams) {
* ZSTD_CCtx_setParameter(zcs, fParam, value);
* }
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
* same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
* pledgedSrcSize must be correct. If srcSize is not known at init time, use
* value ZSTD_CONTENTSIZE_UNKNOWN.
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
ZSTDLIB_API size_t
ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
const ZSTD_CDict* cdict,
ZSTD_frameParameters fParams,
unsigned long long pledgedSrcSize);
/*! ZSTD_resetCStream() :
* This function is deprecated, and is equivalent to:
@ -1673,6 +1769,7 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const
* For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
* but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
* @return : 0, or an error code (which can be tested using ZSTD_isError())
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
@ -1718,8 +1815,10 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
* ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
*
* note: no dictionary will be used if dict == NULL or dictSize < 8
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
/**
* This function is deprecated, and is equivalent to:
*
@ -1727,14 +1826,17 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
* ZSTD_DCtx_refDDict(zds, ddict);
*
* note : ddict is referenced, it must outlive decompression session
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
/**
* This function is deprecated, and is equivalent to:
*
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
*
* re-use decompression parameters from previous init; saves dictionary loading
* Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
*/
ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
@ -1908,7 +2010,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
/*!
Block functions produce and decode raw zstd blocks, without frame metadata.
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
A few rules to respect :