2019-01-04 00:30:03 +00:00
|
|
|
/*
|
2023-05-22 12:32:14 +00:00
|
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
2019-01-04 00:30:03 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This source code is licensed under both the BSD-style license (found in the
|
|
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
|
|
* in the COPYING file in the root directory of this source tree).
|
|
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* zstd_ddict.c :
|
|
|
|
* concentrates all logic that needs to know the internals of ZSTD_DDict object */
|
|
|
|
|
|
|
|
/*-*******************************************************
|
|
|
|
* Dependencies
|
|
|
|
*********************************************************/
|
2023-05-22 12:32:14 +00:00
|
|
|
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
2021-01-08 10:21:43 +00:00
|
|
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
2020-09-18 19:38:36 +00:00
|
|
|
#include "../common/cpu.h" /* bmi2 */
|
|
|
|
#include "../common/mem.h" /* low level memory routines */
|
2019-01-04 00:30:03 +00:00
|
|
|
#define FSE_STATIC_LINKING_ONLY
|
2020-09-18 19:38:36 +00:00
|
|
|
#include "../common/fse.h"
|
|
|
|
#include "../common/huf.h"
|
2019-01-04 00:30:03 +00:00
|
|
|
#include "zstd_decompress_internal.h"
|
|
|
|
#include "zstd_ddict.h"
|
|
|
|
|
|
|
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
2020-09-18 19:38:36 +00:00
|
|
|
# include "../legacy/zstd_legacy.h"
|
2019-01-04 00:30:03 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*-*******************************************************
|
|
|
|
* Types
|
|
|
|
*********************************************************/
|
|
|
|
struct ZSTD_DDict_s {
|
|
|
|
void* dictBuffer;
|
|
|
|
const void* dictContent;
|
|
|
|
size_t dictSize;
|
|
|
|
ZSTD_entropyDTables_t entropy;
|
|
|
|
U32 dictID;
|
|
|
|
U32 entropyPresent;
|
|
|
|
ZSTD_customMem cMem;
|
|
|
|
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
|
|
|
|
|
|
|
|
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
|
|
|
|
{
|
|
|
|
assert(ddict != NULL);
|
|
|
|
return ddict->dictContent;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
|
|
|
|
{
|
|
|
|
assert(ddict != NULL);
|
|
|
|
return ddict->dictSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
|
|
|
{
|
|
|
|
DEBUGLOG(4, "ZSTD_copyDDictParameters");
|
|
|
|
assert(dctx != NULL);
|
|
|
|
assert(ddict != NULL);
|
|
|
|
dctx->dictID = ddict->dictID;
|
|
|
|
dctx->prefixStart = ddict->dictContent;
|
|
|
|
dctx->virtualStart = ddict->dictContent;
|
|
|
|
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
|
|
|
|
dctx->previousDstEnd = dctx->dictEnd;
|
2020-09-18 19:38:36 +00:00
|
|
|
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
|
|
dctx->dictContentBeginForFuzzing = dctx->prefixStart;
|
|
|
|
dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
|
|
|
|
#endif
|
2019-01-04 00:30:03 +00:00
|
|
|
if (ddict->entropyPresent) {
|
|
|
|
dctx->litEntropy = 1;
|
|
|
|
dctx->fseEntropy = 1;
|
|
|
|
dctx->LLTptr = ddict->entropy.LLTable;
|
|
|
|
dctx->MLTptr = ddict->entropy.MLTable;
|
|
|
|
dctx->OFTptr = ddict->entropy.OFTable;
|
|
|
|
dctx->HUFptr = ddict->entropy.hufTable;
|
|
|
|
dctx->entropy.rep[0] = ddict->entropy.rep[0];
|
|
|
|
dctx->entropy.rep[1] = ddict->entropy.rep[1];
|
|
|
|
dctx->entropy.rep[2] = ddict->entropy.rep[2];
|
|
|
|
} else {
|
|
|
|
dctx->litEntropy = 0;
|
|
|
|
dctx->fseEntropy = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
|
|
|
|
ZSTD_dictContentType_e dictContentType)
|
|
|
|
{
|
|
|
|
ddict->dictID = 0;
|
|
|
|
ddict->entropyPresent = 0;
|
|
|
|
if (dictContentType == ZSTD_dct_rawContent) return 0;
|
|
|
|
|
|
|
|
if (ddict->dictSize < 8) {
|
|
|
|
if (dictContentType == ZSTD_dct_fullDict)
|
|
|
|
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
|
|
|
|
return 0; /* pure content mode */
|
|
|
|
}
|
|
|
|
{ U32 const magic = MEM_readLE32(ddict->dictContent);
|
|
|
|
if (magic != ZSTD_MAGIC_DICTIONARY) {
|
|
|
|
if (dictContentType == ZSTD_dct_fullDict)
|
|
|
|
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
|
|
|
|
return 0; /* pure content mode */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
|
|
|
|
|
|
|
|
/* load entropy tables */
|
2019-04-18 09:53:29 +00:00
|
|
|
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
|
|
|
|
&ddict->entropy, ddict->dictContent, ddict->dictSize)),
|
2020-09-18 19:38:36 +00:00
|
|
|
dictionary_corrupted, "");
|
2019-01-04 00:30:03 +00:00
|
|
|
ddict->entropyPresent = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
|
|
|
const void* dict, size_t dictSize,
|
|
|
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
|
|
|
ZSTD_dictContentType_e dictContentType)
|
|
|
|
{
|
|
|
|
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
|
|
|
|
ddict->dictBuffer = NULL;
|
|
|
|
ddict->dictContent = dict;
|
|
|
|
if (!dict) dictSize = 0;
|
|
|
|
} else {
|
2021-01-08 10:21:43 +00:00
|
|
|
void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
|
2019-01-04 00:30:03 +00:00
|
|
|
ddict->dictBuffer = internalBuffer;
|
|
|
|
ddict->dictContent = internalBuffer;
|
|
|
|
if (!internalBuffer) return ERROR(memory_allocation);
|
2021-01-08 10:21:43 +00:00
|
|
|
ZSTD_memcpy(internalBuffer, dict, dictSize);
|
2019-01-04 00:30:03 +00:00
|
|
|
}
|
|
|
|
ddict->dictSize = dictSize;
|
2023-05-22 12:32:14 +00:00
|
|
|
ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
|
2019-01-04 00:30:03 +00:00
|
|
|
|
|
|
|
/* parse dictionary content */
|
2020-09-18 19:38:36 +00:00
|
|
|
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
|
2019-01-04 00:30:03 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
|
|
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
|
|
|
ZSTD_dictContentType_e dictContentType,
|
|
|
|
ZSTD_customMem customMem)
|
|
|
|
{
|
2021-01-08 10:21:43 +00:00
|
|
|
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
2019-01-04 00:30:03 +00:00
|
|
|
|
2021-01-08 10:21:43 +00:00
|
|
|
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
|
2019-01-04 00:30:03 +00:00
|
|
|
if (ddict == NULL) return NULL;
|
|
|
|
ddict->cMem = customMem;
|
|
|
|
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
|
|
|
|
dict, dictSize,
|
|
|
|
dictLoadMethod, dictContentType);
|
|
|
|
if (ZSTD_isError(initResult)) {
|
|
|
|
ZSTD_freeDDict(ddict);
|
|
|
|
return NULL;
|
|
|
|
} }
|
|
|
|
return ddict;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! ZSTD_createDDict() :
|
|
|
|
* Create a digested dictionary, to start decompression without startup delay.
|
|
|
|
* `dict` content is copied inside DDict.
|
|
|
|
* Consequently, `dict` can be released after `ZSTD_DDict` creation */
|
|
|
|
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
|
|
|
|
{
|
|
|
|
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
|
|
|
return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! ZSTD_createDDict_byReference() :
|
|
|
|
* Create a digested dictionary, to start decompression without startup delay.
|
|
|
|
* Dictionary content is simply referenced, it will be accessed during decompression.
|
|
|
|
* Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
|
|
|
|
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
|
|
|
|
{
|
|
|
|
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
|
|
|
return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const ZSTD_DDict* ZSTD_initStaticDDict(
|
|
|
|
void* sBuffer, size_t sBufferSize,
|
|
|
|
const void* dict, size_t dictSize,
|
|
|
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
|
|
|
ZSTD_dictContentType_e dictContentType)
|
|
|
|
{
|
|
|
|
size_t const neededSpace = sizeof(ZSTD_DDict)
|
|
|
|
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
|
|
|
|
ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
|
|
|
|
assert(sBuffer != NULL);
|
|
|
|
assert(dict != NULL);
|
|
|
|
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
|
|
|
|
if (sBufferSize < neededSpace) return NULL;
|
|
|
|
if (dictLoadMethod == ZSTD_dlm_byCopy) {
|
2021-01-08 10:21:43 +00:00
|
|
|
ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
|
2019-01-04 00:30:03 +00:00
|
|
|
dict = ddict+1;
|
|
|
|
}
|
|
|
|
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
|
|
|
|
dict, dictSize,
|
|
|
|
ZSTD_dlm_byRef, dictContentType) ))
|
|
|
|
return NULL;
|
|
|
|
return ddict;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
|
|
|
{
|
|
|
|
if (ddict==NULL) return 0; /* support free on NULL */
|
|
|
|
{ ZSTD_customMem const cMem = ddict->cMem;
|
2021-01-08 10:21:43 +00:00
|
|
|
ZSTD_customFree(ddict->dictBuffer, cMem);
|
|
|
|
ZSTD_customFree(ddict, cMem);
|
2019-01-04 00:30:03 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! ZSTD_estimateDDictSize() :
|
|
|
|
* Estimate amount of memory that will be needed to create a dictionary for decompression.
|
|
|
|
* Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
|
|
|
|
size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
|
|
|
|
{
|
|
|
|
return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
|
|
|
|
{
|
|
|
|
if (ddict==NULL) return 0; /* support sizeof on NULL */
|
|
|
|
return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! ZSTD_getDictID_fromDDict() :
|
|
|
|
* Provides the dictID of the dictionary loaded into `ddict`.
|
|
|
|
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
|
|
|
|
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
|
|
|
|
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
|
|
|
|
{
|
|
|
|
if (ddict==NULL) return 0;
|
2023-05-22 12:32:14 +00:00
|
|
|
return ddict->dictID;
|
2019-01-04 00:30:03 +00:00
|
|
|
}
|