2017-06-09 01:43:56 +00:00
/* ******************************************************************
2020-09-18 19:38:36 +00:00
* FSE : Finite State Entropy encoder
2021-11-19 11:54:45 +00:00
* Copyright ( c ) Yann Collet , Facebook , Inc .
2020-09-18 19:38:36 +00:00
*
* You can contact the author at :
* - FSE source repository : https : //github.com/Cyan4973/FiniteStateEntropy
* - Public forum : https : //groups.google.com/forum/#!forum/lz4c
*
* This source code is licensed under both the BSD - style license ( found in the
* LICENSE file in the root directory of this source tree ) and the GPLv2 ( found
* in the COPYING file in the root directory of this source tree ) .
* You may select , at your option , one of the above - listed licenses .
2017-06-09 01:43:56 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* **************************************************************
* Includes
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2020-09-18 19:38:36 +00:00
# include "../common/compiler.h"
# include "../common/mem.h" /* U32, U16, etc. */
# include "../common/debug.h" /* assert, DEBUGLOG */
2019-01-04 00:30:03 +00:00
# include "hist.h" /* HIST_count_wksp */
2020-09-18 19:38:36 +00:00
# include "../common/bitstream.h"
2017-06-09 01:43:56 +00:00
# define FSE_STATIC_LINKING_ONLY
2020-09-18 19:38:36 +00:00
# include "../common/fse.h"
# include "../common/error_private.h"
2021-01-08 10:21:43 +00:00
# define ZSTD_DEPS_NEED_MALLOC
# define ZSTD_DEPS_NEED_MATH64
# include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
2017-06-09 01:43:56 +00:00
/* **************************************************************
* Error Management
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2017-08-27 10:05:17 +00:00
# define FSE_isError ERR_isError
2017-06-09 01:43:56 +00:00
/* **************************************************************
* Templates
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
designed to be included
for type - specific functions ( template emulation in C )
Objective is to write these functions only once , for improved maintenance
*/
/* safety checks */
# ifndef FSE_FUNCTION_EXTENSION
# error "FSE_FUNCTION_EXTENSION must be defined"
# endif
# ifndef FSE_FUNCTION_TYPE
# error "FSE_FUNCTION_TYPE must be defined"
# endif
/* Function names */
# define FSE_CAT(X,Y) X##Y
# define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
# define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable ( ) , but using an externally allocated scratch buffer ( ` workSpace ` ) .
* wkspSize should be sized to handle worst case situation , which is ` 1 < < max_tableLog * sizeof ( FSE_FUNCTION_TYPE ) `
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
*/
2019-01-04 00:30:03 +00:00
size_t FSE_buildCTable_wksp ( FSE_CTable * ct ,
const short * normalizedCounter , unsigned maxSymbolValue , unsigned tableLog ,
void * workSpace , size_t wkspSize )
2017-06-09 01:43:56 +00:00
{
U32 const tableSize = 1 < < tableLog ;
U32 const tableMask = tableSize - 1 ;
void * const ptr = ct ;
U16 * const tableU16 = ( ( U16 * ) ptr ) + 2 ;
void * const FSCT = ( ( U32 * ) ptr ) + 1 /* header */ + ( tableLog ? tableSize > > 1 : 1 ) ;
FSE_symbolCompressionTransform * const symbolTT = ( FSE_symbolCompressionTransform * ) ( FSCT ) ;
U32 const step = FSE_TABLESTEP ( tableSize ) ;
2022-01-24 10:04:45 +00:00
U32 const maxSV1 = maxSymbolValue + 1 ;
2017-06-09 01:43:56 +00:00
2022-01-24 10:04:45 +00:00
U16 * cumul = ( U16 * ) workSpace ; /* size = maxSV1 */
FSE_FUNCTION_TYPE * const tableSymbol = ( FSE_FUNCTION_TYPE * ) ( cumul + ( maxSV1 + 1 ) ) ; /* size = tableSize */
2021-01-08 10:21:43 +00:00
2017-06-09 01:43:56 +00:00
U32 highThreshold = tableSize - 1 ;
2022-01-24 10:04:45 +00:00
assert ( ( ( size_t ) workSpace & 1 ) = = 0 ) ; /* Must be 2 bytes-aligned */
2021-01-08 10:21:43 +00:00
if ( FSE_BUILD_CTABLE_WORKSPACE_SIZE ( maxSymbolValue , tableLog ) > wkspSize ) return ERROR ( tableLog_tooLarge ) ;
2017-06-09 01:43:56 +00:00
/* CTable header */
tableU16 [ - 2 ] = ( U16 ) tableLog ;
tableU16 [ - 1 ] = ( U16 ) maxSymbolValue ;
2019-01-04 00:30:03 +00:00
assert ( tableLog < 16 ) ; /* required for threshold strategy to work */
2017-06-09 01:43:56 +00:00
/* For explanations on how to distribute symbol values over the table :
2019-01-04 00:30:03 +00:00
* http : //fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
# ifdef __clang_analyzer__
2021-01-08 10:21:43 +00:00
ZSTD_memset ( tableSymbol , 0 , sizeof ( * tableSymbol ) * tableSize ) ; /* useless initialization, just to keep scan-build happy */
2019-01-04 00:30:03 +00:00
# endif
2017-06-09 01:43:56 +00:00
/* symbol start positions */
{ U32 u ;
cumul [ 0 ] = 0 ;
2022-01-24 10:04:45 +00:00
for ( u = 1 ; u < = maxSV1 ; u + + ) {
2017-06-09 01:43:56 +00:00
if ( normalizedCounter [ u - 1 ] = = - 1 ) { /* Low proba symbol */
cumul [ u ] = cumul [ u - 1 ] + 1 ;
tableSymbol [ highThreshold - - ] = ( FSE_FUNCTION_TYPE ) ( u - 1 ) ;
} else {
2022-01-24 10:04:45 +00:00
assert ( normalizedCounter [ u - 1 ] > = 0 ) ;
cumul [ u ] = cumul [ u - 1 ] + ( U16 ) normalizedCounter [ u - 1 ] ;
assert ( cumul [ u ] > = cumul [ u - 1 ] ) ; /* no overflow */
2017-06-09 01:43:56 +00:00
} }
2022-01-24 10:04:45 +00:00
cumul [ maxSV1 ] = ( U16 ) ( tableSize + 1 ) ;
2017-06-09 01:43:56 +00:00
}
/* Spread symbols */
2022-01-24 10:04:45 +00:00
if ( highThreshold = = tableSize - 1 ) {
/* Case for no low prob count symbols. Lay down 8 bytes at a time
* to reduce branch misses since we are operating on a small block
*/
BYTE * const spread = tableSymbol + tableSize ; /* size = tableSize + 8 (may write beyond tableSize) */
{ U64 const add = 0x0101010101010101ull ;
size_t pos = 0 ;
U64 sv = 0 ;
U32 s ;
for ( s = 0 ; s < maxSV1 ; + + s , sv + = add ) {
int i ;
int const n = normalizedCounter [ s ] ;
MEM_write64 ( spread + pos , sv ) ;
for ( i = 8 ; i < n ; i + = 8 ) {
MEM_write64 ( spread + pos + i , sv ) ;
}
assert ( n > = 0 ) ;
pos + = ( size_t ) n ;
}
}
/* Spread symbols across the table. Lack of lowprob symbols means that
* we don ' t need variable sized inner loop , so we can unroll the loop and
* reduce branch misses .
*/
{ size_t position = 0 ;
size_t s ;
size_t const unroll = 2 ; /* Experimentally determined optimal unroll */
assert ( tableSize % unroll = = 0 ) ; /* FSE_MIN_TABLELOG is 5 */
for ( s = 0 ; s < ( size_t ) tableSize ; s + = unroll ) {
size_t u ;
for ( u = 0 ; u < unroll ; + + u ) {
size_t const uPosition = ( position + ( u * step ) ) & tableMask ;
tableSymbol [ uPosition ] = spread [ s + u ] ;
}
position = ( position + ( unroll * step ) ) & tableMask ;
}
assert ( position = = 0 ) ; /* Must have initialized all positions */
}
} else {
U32 position = 0 ;
2017-06-09 01:43:56 +00:00
U32 symbol ;
2022-01-24 10:04:45 +00:00
for ( symbol = 0 ; symbol < maxSV1 ; symbol + + ) {
2019-04-18 09:53:29 +00:00
int nbOccurrences ;
2019-01-04 00:30:03 +00:00
int const freq = normalizedCounter [ symbol ] ;
2019-04-18 09:53:29 +00:00
for ( nbOccurrences = 0 ; nbOccurrences < freq ; nbOccurrences + + ) {
2017-06-09 01:43:56 +00:00
tableSymbol [ position ] = ( FSE_FUNCTION_TYPE ) symbol ;
position = ( position + step ) & tableMask ;
2019-01-04 00:30:03 +00:00
while ( position > highThreshold )
position = ( position + step ) & tableMask ; /* Low proba area */
2017-06-09 01:43:56 +00:00
} }
2019-01-04 00:30:03 +00:00
assert ( position = = 0 ) ; /* Must have initialized all positions */
2017-06-09 01:43:56 +00:00
}
/* Build table */
{ U32 u ; for ( u = 0 ; u < tableSize ; u + + ) {
FSE_FUNCTION_TYPE s = tableSymbol [ u ] ; /* note : static analyzer may not understand tableSymbol is properly initialized */
tableU16 [ cumul [ s ] + + ] = ( U16 ) ( tableSize + u ) ; /* TableU16 : sorted by symbol order; gives next state value */
} }
/* Build Symbol Transformation Table */
{ unsigned total = 0 ;
unsigned s ;
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
switch ( normalizedCounter [ s ] )
{
2019-01-04 00:30:03 +00:00
case 0 :
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
symbolTT [ s ] . deltaNbBits = ( ( tableLog + 1 ) < < 16 ) - ( 1 < < tableLog ) ;
break ;
2017-06-09 01:43:56 +00:00
case - 1 :
case 1 :
symbolTT [ s ] . deltaNbBits = ( tableLog < < 16 ) - ( 1 < < tableLog ) ;
2022-01-24 10:04:45 +00:00
assert ( total < = INT_MAX ) ;
symbolTT [ s ] . deltaFindState = ( int ) ( total - 1 ) ;
2017-06-09 01:43:56 +00:00
total + + ;
break ;
default :
2022-01-24 10:04:45 +00:00
assert ( normalizedCounter [ s ] > 1 ) ;
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ( ( U32 ) normalizedCounter [ s ] - 1 ) ;
U32 const minStatePlus = ( U32 ) normalizedCounter [ s ] < < maxBitsOut ;
2017-06-09 01:43:56 +00:00
symbolTT [ s ] . deltaNbBits = ( maxBitsOut < < 16 ) - minStatePlus ;
2022-01-24 10:04:45 +00:00
symbolTT [ s ] . deltaFindState = ( int ) ( total - ( unsigned ) normalizedCounter [ s ] ) ;
total + = ( unsigned ) normalizedCounter [ s ] ;
2017-06-09 01:43:56 +00:00
} } } }
2019-01-04 00:30:03 +00:00
#if 0 /* debug : symbol costs */
DEBUGLOG ( 5 , " \n --- table statistics : " ) ;
{ U32 symbol ;
for ( symbol = 0 ; symbol < = maxSymbolValue ; symbol + + ) {
DEBUGLOG ( 5 , " %3u: w=%3i, maxBits=%u, fracBits=%.2f " ,
symbol , normalizedCounter [ symbol ] ,
FSE_getMaxNbBits ( symbolTT , symbol ) ,
( double ) FSE_bitCost ( symbolTT , tableLog , symbol , 8 ) / 256 ) ;
2022-01-24 10:04:45 +00:00
} }
2019-01-04 00:30:03 +00:00
# endif
2017-06-09 01:43:56 +00:00
return 0 ;
}
# ifndef FSE_COMMONDEFS_ONLY
/*-**************************************************************
2019-01-04 00:30:03 +00:00
* FSE NCount encoding
2017-06-09 01:43:56 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
size_t FSE_NCountWriteBound ( unsigned maxSymbolValue , unsigned tableLog )
{
2022-01-24 10:04:45 +00:00
size_t const maxHeaderSize = ( ( ( maxSymbolValue + 1 ) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */ ) / 8 )
+ 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */ ;
2017-06-09 01:43:56 +00:00
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND ; /* maxSymbolValue==0 ? use default */
}
2019-01-04 00:30:03 +00:00
static size_t
FSE_writeNCount_generic ( void * header , size_t headerBufferSize ,
const short * normalizedCounter , unsigned maxSymbolValue , unsigned tableLog ,
unsigned writeIsSafe )
2017-06-09 01:43:56 +00:00
{
BYTE * const ostart = ( BYTE * ) header ;
BYTE * out = ostart ;
BYTE * const oend = ostart + headerBufferSize ;
int nbBits ;
const int tableSize = 1 < < tableLog ;
int remaining ;
int threshold ;
2019-01-04 00:30:03 +00:00
U32 bitStream = 0 ;
int bitCount = 0 ;
unsigned symbol = 0 ;
unsigned const alphabetSize = maxSymbolValue + 1 ;
int previousIs0 = 0 ;
2017-06-09 01:43:56 +00:00
/* Table Size */
bitStream + = ( tableLog - FSE_MIN_TABLELOG ) < < bitCount ;
bitCount + = 4 ;
/* Init */
remaining = tableSize + 1 ; /* +1 for extra accuracy */
threshold = tableSize ;
nbBits = tableLog + 1 ;
2019-01-04 00:30:03 +00:00
while ( ( symbol < alphabetSize ) & & ( remaining > 1 ) ) { /* stops at 1 */
if ( previousIs0 ) {
unsigned start = symbol ;
while ( ( symbol < alphabetSize ) & & ! normalizedCounter [ symbol ] ) symbol + + ;
if ( symbol = = alphabetSize ) break ; /* incorrect distribution */
while ( symbol > = start + 24 ) {
2017-06-09 01:43:56 +00:00
start + = 24 ;
bitStream + = 0xFFFFU < < bitCount ;
2019-01-04 00:30:03 +00:00
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) )
return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2017-06-09 01:43:56 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = 2 ;
bitStream > > = 16 ;
}
2019-01-04 00:30:03 +00:00
while ( symbol > = start + 3 ) {
2017-06-09 01:43:56 +00:00
start + = 3 ;
bitStream + = 3 < < bitCount ;
bitCount + = 2 ;
}
2019-01-04 00:30:03 +00:00
bitStream + = ( symbol - start ) < < bitCount ;
2017-06-09 01:43:56 +00:00
bitCount + = 2 ;
if ( bitCount > 16 ) {
2019-01-04 00:30:03 +00:00
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) )
return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2017-06-09 01:43:56 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = 2 ;
bitStream > > = 16 ;
bitCount - = 16 ;
} }
2019-01-04 00:30:03 +00:00
{ int count = normalizedCounter [ symbol + + ] ;
int const max = ( 2 * threshold - 1 ) - remaining ;
2017-06-09 01:43:56 +00:00
remaining - = count < 0 ? - count : count ;
count + + ; /* +1 for extra accuracy */
2019-01-04 00:30:03 +00:00
if ( count > = threshold )
count + = max ; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
2017-06-09 01:43:56 +00:00
bitStream + = count < < bitCount ;
bitCount + = nbBits ;
bitCount - = ( count < max ) ;
2019-01-04 00:30:03 +00:00
previousIs0 = ( count = = 1 ) ;
2017-06-09 01:43:56 +00:00
if ( remaining < 1 ) return ERROR ( GENERIC ) ;
2018-05-15 17:45:22 +00:00
while ( remaining < threshold ) { nbBits - - ; threshold > > = 1 ; }
2017-06-09 01:43:56 +00:00
}
if ( bitCount > 16 ) {
2019-01-04 00:30:03 +00:00
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) )
return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2017-06-09 01:43:56 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = 2 ;
bitStream > > = 16 ;
bitCount - = 16 ;
} }
2019-01-04 00:30:03 +00:00
if ( remaining ! = 1 )
return ERROR ( GENERIC ) ; /* incorrect normalized distribution */
assert ( symbol < = alphabetSize ) ;
2017-06-09 01:43:56 +00:00
/* flush remaining bitStream */
2019-01-04 00:30:03 +00:00
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) )
return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2017-06-09 01:43:56 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = ( bitCount + 7 ) / 8 ;
return ( out - ostart ) ;
}
2019-01-04 00:30:03 +00:00
size_t FSE_writeNCount ( void * buffer , size_t bufferSize ,
const short * normalizedCounter , unsigned maxSymbolValue , unsigned tableLog )
2017-06-09 01:43:56 +00:00
{
if ( tableLog > FSE_MAX_TABLELOG ) return ERROR ( tableLog_tooLarge ) ; /* Unsupported */
if ( tableLog < FSE_MIN_TABLELOG ) return ERROR ( GENERIC ) ; /* Unsupported */
if ( bufferSize < FSE_NCountWriteBound ( maxSymbolValue , tableLog ) )
return FSE_writeNCount_generic ( buffer , bufferSize , normalizedCounter , maxSymbolValue , tableLog , 0 ) ;
2019-01-04 00:30:03 +00:00
return FSE_writeNCount_generic ( buffer , bufferSize , normalizedCounter , maxSymbolValue , tableLog , 1 /* write in buffer is safe */ ) ;
2017-06-09 01:43:56 +00:00
}
/*-**************************************************************
* FSE Compression Code
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
FSE_CTable * FSE_createCTable ( unsigned maxSymbolValue , unsigned tableLog )
{
size_t size ;
if ( tableLog > FSE_TABLELOG_ABSOLUTE_MAX ) tableLog = FSE_TABLELOG_ABSOLUTE_MAX ;
size = FSE_CTABLE_SIZE_U32 ( tableLog , maxSymbolValue ) * sizeof ( U32 ) ;
2021-01-08 10:21:43 +00:00
return ( FSE_CTable * ) ZSTD_malloc ( size ) ;
2017-06-09 01:43:56 +00:00
}
2021-01-08 10:21:43 +00:00
void FSE_freeCTable ( FSE_CTable * ct ) { ZSTD_free ( ct ) ; }
2017-06-09 01:43:56 +00:00
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog ( size_t srcSize , unsigned maxSymbolValue )
{
2019-01-04 00:30:03 +00:00
U32 minBitsSrc = BIT_highbit32 ( ( U32 ) ( srcSize ) ) + 1 ;
2017-06-09 01:43:56 +00:00
U32 minBitsSymbols = BIT_highbit32 ( maxSymbolValue ) + 2 ;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols ;
2017-10-26 20:41:47 +00:00
assert ( srcSize > 1 ) ; /* Not supported, RLE should be used instead */
2017-06-09 01:43:56 +00:00
return minBits ;
}
unsigned FSE_optimalTableLog_internal ( unsigned maxTableLog , size_t srcSize , unsigned maxSymbolValue , unsigned minus )
{
U32 maxBitsSrc = BIT_highbit32 ( ( U32 ) ( srcSize - 1 ) ) - minus ;
U32 tableLog = maxTableLog ;
U32 minBits = FSE_minTableLog ( srcSize , maxSymbolValue ) ;
2017-10-26 20:41:47 +00:00
assert ( srcSize > 1 ) ; /* Not supported, RLE should be used instead */
2017-06-09 01:43:56 +00:00
if ( tableLog = = 0 ) tableLog = FSE_DEFAULT_TABLELOG ;
if ( maxBitsSrc < tableLog ) tableLog = maxBitsSrc ; /* Accuracy can be reduced */
if ( minBits > tableLog ) tableLog = minBits ; /* Need a minimum to safely represent all symbol values */
if ( tableLog < FSE_MIN_TABLELOG ) tableLog = FSE_MIN_TABLELOG ;
if ( tableLog > FSE_MAX_TABLELOG ) tableLog = FSE_MAX_TABLELOG ;
return tableLog ;
}
unsigned FSE_optimalTableLog ( unsigned maxTableLog , size_t srcSize , unsigned maxSymbolValue )
{
return FSE_optimalTableLog_internal ( maxTableLog , srcSize , maxSymbolValue , 2 ) ;
}
/* Secondary normalization method.
To be used when primary method fails . */
2021-01-08 10:21:43 +00:00
static size_t FSE_normalizeM2 ( short * norm , U32 tableLog , const unsigned * count , size_t total , U32 maxSymbolValue , short lowProbCount )
2017-06-09 01:43:56 +00:00
{
short const NOT_YET_ASSIGNED = - 2 ;
U32 s ;
U32 distributed = 0 ;
U32 ToDistribute ;
/* Init */
U32 const lowThreshold = ( U32 ) ( total > > tableLog ) ;
U32 lowOne = ( U32 ) ( ( total * 3 ) > > ( tableLog + 1 ) ) ;
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( count [ s ] = = 0 ) {
norm [ s ] = 0 ;
continue ;
}
if ( count [ s ] < = lowThreshold ) {
2021-01-08 10:21:43 +00:00
norm [ s ] = lowProbCount ;
2017-06-09 01:43:56 +00:00
distributed + + ;
total - = count [ s ] ;
continue ;
}
if ( count [ s ] < = lowOne ) {
norm [ s ] = 1 ;
distributed + + ;
total - = count [ s ] ;
continue ;
}
norm [ s ] = NOT_YET_ASSIGNED ;
}
ToDistribute = ( 1 < < tableLog ) - distributed ;
2019-01-04 00:30:03 +00:00
if ( ToDistribute = = 0 )
return 0 ;
2017-06-09 01:43:56 +00:00
if ( ( total / ToDistribute ) > lowOne ) {
/* risk of rounding to zero */
lowOne = ( U32 ) ( ( total * 3 ) / ( ToDistribute * 2 ) ) ;
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( ( norm [ s ] = = NOT_YET_ASSIGNED ) & & ( count [ s ] < = lowOne ) ) {
norm [ s ] = 1 ;
distributed + + ;
total - = count [ s ] ;
continue ;
} }
ToDistribute = ( 1 < < tableLog ) - distributed ;
}
if ( distributed = = maxSymbolValue + 1 ) {
/* all values are pretty poor;
probably incompressible data ( should have already been detected ) ;
find max , then give all remaining points to max */
U32 maxV = 0 , maxC = 0 ;
for ( s = 0 ; s < = maxSymbolValue ; s + + )
2018-05-15 17:45:22 +00:00
if ( count [ s ] > maxC ) { maxV = s ; maxC = count [ s ] ; }
2017-06-09 01:43:56 +00:00
norm [ maxV ] + = ( short ) ToDistribute ;
return 0 ;
}
if ( total = = 0 ) {
/* all of the symbols were low enough for the lowOne or lowThreshold */
for ( s = 0 ; ToDistribute > 0 ; s = ( s + 1 ) % ( maxSymbolValue + 1 ) )
2018-05-15 17:45:22 +00:00
if ( norm [ s ] > 0 ) { ToDistribute - - ; norm [ s ] + + ; }
2017-06-09 01:43:56 +00:00
return 0 ;
}
{ U64 const vStepLog = 62 - tableLog ;
U64 const mid = ( 1ULL < < ( vStepLog - 1 ) ) - 1 ;
2021-01-08 10:21:43 +00:00
U64 const rStep = ZSTD_div64 ( ( ( ( U64 ) 1 < < vStepLog ) * ToDistribute ) + mid , ( U32 ) total ) ; /* scale on remaining */
2017-06-09 01:43:56 +00:00
U64 tmpTotal = mid ;
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( norm [ s ] = = NOT_YET_ASSIGNED ) {
U64 const end = tmpTotal + ( count [ s ] * rStep ) ;
U32 const sStart = ( U32 ) ( tmpTotal > > vStepLog ) ;
U32 const sEnd = ( U32 ) ( end > > vStepLog ) ;
U32 const weight = sEnd - sStart ;
if ( weight < 1 )
return ERROR ( GENERIC ) ;
norm [ s ] = ( short ) weight ;
tmpTotal = end ;
} } }
return 0 ;
}
size_t FSE_normalizeCount ( short * normalizedCounter , unsigned tableLog ,
const unsigned * count , size_t total ,
2021-01-08 10:21:43 +00:00
unsigned maxSymbolValue , unsigned useLowProbCount )
2017-06-09 01:43:56 +00:00
{
/* Sanity checks */
if ( tableLog = = 0 ) tableLog = FSE_DEFAULT_TABLELOG ;
if ( tableLog < FSE_MIN_TABLELOG ) return ERROR ( GENERIC ) ; /* Unsupported size */
if ( tableLog > FSE_MAX_TABLELOG ) return ERROR ( tableLog_tooLarge ) ; /* Unsupported size */
if ( tableLog < FSE_minTableLog ( total , maxSymbolValue ) ) return ERROR ( GENERIC ) ; /* Too small tableLog, compression potentially impossible */
2017-10-26 20:41:47 +00:00
{ static U32 const rtbTable [ ] = { 0 , 473195 , 504333 , 520860 , 550000 , 700000 , 750000 , 830000 } ;
2021-01-08 10:21:43 +00:00
short const lowProbCount = useLowProbCount ? - 1 : 1 ;
2017-06-09 01:43:56 +00:00
U64 const scale = 62 - tableLog ;
2021-01-08 10:21:43 +00:00
U64 const step = ZSTD_div64 ( ( U64 ) 1 < < 62 , ( U32 ) total ) ; /* <== here, one division ! */
2017-06-09 01:43:56 +00:00
U64 const vStep = 1ULL < < ( scale - 20 ) ;
int stillToDistribute = 1 < < tableLog ;
unsigned s ;
unsigned largest = 0 ;
short largestP = 0 ;
U32 lowThreshold = ( U32 ) ( total > > tableLog ) ;
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( count [ s ] = = total ) return 0 ; /* rle special case */
if ( count [ s ] = = 0 ) { normalizedCounter [ s ] = 0 ; continue ; }
if ( count [ s ] < = lowThreshold ) {
2021-01-08 10:21:43 +00:00
normalizedCounter [ s ] = lowProbCount ;
2017-06-09 01:43:56 +00:00
stillToDistribute - - ;
} else {
short proba = ( short ) ( ( count [ s ] * step ) > > scale ) ;
if ( proba < 8 ) {
U64 restToBeat = vStep * rtbTable [ proba ] ;
proba + = ( count [ s ] * step ) - ( ( U64 ) proba < < scale ) > restToBeat ;
}
2018-05-15 17:45:22 +00:00
if ( proba > largestP ) { largestP = proba ; largest = s ; }
2017-06-09 01:43:56 +00:00
normalizedCounter [ s ] = proba ;
stillToDistribute - = proba ;
} }
if ( - stillToDistribute > = ( normalizedCounter [ largest ] > > 1 ) ) {
/* corner case, need another normalization method */
2021-01-08 10:21:43 +00:00
size_t const errorCode = FSE_normalizeM2 ( normalizedCounter , tableLog , count , total , maxSymbolValue , lowProbCount ) ;
2017-06-09 01:43:56 +00:00
if ( FSE_isError ( errorCode ) ) return errorCode ;
}
else normalizedCounter [ largest ] + = ( short ) stillToDistribute ;
}
#if 0
{ /* Print Table (debug) */
U32 s ;
U32 nTotal = 0 ;
for ( s = 0 ; s < = maxSymbolValue ; s + + )
2019-01-04 00:30:03 +00:00
RAWLOG ( 2 , " %3i: %4i \n " , s , normalizedCounter [ s ] ) ;
2017-06-09 01:43:56 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + )
nTotal + = abs ( normalizedCounter [ s ] ) ;
if ( nTotal ! = ( 1U < < tableLog ) )
2019-01-04 00:30:03 +00:00
RAWLOG ( 2 , " Warning !!! Total == %u != %u !!! " , nTotal , 1U < < tableLog ) ;
2017-06-09 01:43:56 +00:00
getchar ( ) ;
}
# endif
return tableLog ;
}
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw ( FSE_CTable * ct , unsigned nbBits )
{
const unsigned tableSize = 1 < < nbBits ;
const unsigned tableMask = tableSize - 1 ;
const unsigned maxSymbolValue = tableMask ;
void * const ptr = ct ;
U16 * const tableU16 = ( ( U16 * ) ptr ) + 2 ;
void * const FSCT = ( ( U32 * ) ptr ) + 1 /* header */ + ( tableSize > > 1 ) ; /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform * const symbolTT = ( FSE_symbolCompressionTransform * ) ( FSCT ) ;
unsigned s ;
/* Sanity checks */
if ( nbBits < 1 ) return ERROR ( GENERIC ) ; /* min size */
/* header */
tableU16 [ - 2 ] = ( U16 ) nbBits ;
tableU16 [ - 1 ] = ( U16 ) maxSymbolValue ;
/* Build table */
for ( s = 0 ; s < tableSize ; s + + )
tableU16 [ s ] = ( U16 ) ( tableSize + s ) ;
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = ( nbBits < < 16 ) - ( 1 < < nbBits ) ;
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
symbolTT [ s ] . deltaNbBits = deltaNbBits ;
symbolTT [ s ] . deltaFindState = s - 1 ;
} }
return 0 ;
}
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle ( FSE_CTable * ct , BYTE symbolValue )
{
void * ptr = ct ;
U16 * tableU16 = ( ( U16 * ) ptr ) + 2 ;
void * FSCTptr = ( U32 * ) ptr + 2 ;
FSE_symbolCompressionTransform * symbolTT = ( FSE_symbolCompressionTransform * ) FSCTptr ;
/* header */
tableU16 [ - 2 ] = ( U16 ) 0 ;
tableU16 [ - 1 ] = ( U16 ) symbolValue ;
/* Build table */
tableU16 [ 0 ] = 0 ;
tableU16 [ 1 ] = 0 ; /* just in case */
/* Build Symbol Transformation Table */
symbolTT [ symbolValue ] . deltaNbBits = 0 ;
symbolTT [ symbolValue ] . deltaFindState = 0 ;
return 0 ;
}
static size_t FSE_compress_usingCTable_generic ( void * dst , size_t dstSize ,
const void * src , size_t srcSize ,
const FSE_CTable * ct , const unsigned fast )
{
const BYTE * const istart = ( const BYTE * ) src ;
const BYTE * const iend = istart + srcSize ;
const BYTE * ip = iend ;
BIT_CStream_t bitC ;
FSE_CState_t CState1 , CState2 ;
/* init */
if ( srcSize < = 2 ) return 0 ;
{ size_t const initError = BIT_initCStream ( & bitC , dst , dstSize ) ;
if ( FSE_isError ( initError ) ) return 0 ; /* not enough space available to write a bitstream */ }
# define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
if ( srcSize & 1 ) {
FSE_initCState2 ( & CState1 , ct , * - - ip ) ;
FSE_initCState2 ( & CState2 , ct , * - - ip ) ;
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
FSE_FLUSHBITS ( & bitC ) ;
} else {
FSE_initCState2 ( & CState2 , ct , * - - ip ) ;
FSE_initCState2 ( & CState1 , ct , * - - ip ) ;
}
/* join to mod 4 */
srcSize - = 2 ;
if ( ( sizeof ( bitC . bitContainer ) * 8 > FSE_MAX_TABLELOG * 4 + 7 ) & & ( srcSize & 2 ) ) { /* test bit 2 */
FSE_encodeSymbol ( & bitC , & CState2 , * - - ip ) ;
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
FSE_FLUSHBITS ( & bitC ) ;
}
/* 2 or 4 encoding per loop */
while ( ip > istart ) {
FSE_encodeSymbol ( & bitC , & CState2 , * - - ip ) ;
if ( sizeof ( bitC . bitContainer ) * 8 < FSE_MAX_TABLELOG * 2 + 7 ) /* this test must be static */
FSE_FLUSHBITS ( & bitC ) ;
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
if ( sizeof ( bitC . bitContainer ) * 8 > FSE_MAX_TABLELOG * 4 + 7 ) { /* this test must be static */
FSE_encodeSymbol ( & bitC , & CState2 , * - - ip ) ;
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
}
FSE_FLUSHBITS ( & bitC ) ;
}
FSE_flushCState ( & bitC , & CState2 ) ;
FSE_flushCState ( & bitC , & CState1 ) ;
return BIT_closeCStream ( & bitC ) ;
}
size_t FSE_compress_usingCTable ( void * dst , size_t dstSize ,
const void * src , size_t srcSize ,
const FSE_CTable * ct )
{
unsigned const fast = ( dstSize > = FSE_BLOCKBOUND ( srcSize ) ) ;
if ( fast )
return FSE_compress_usingCTable_generic ( dst , dstSize , src , srcSize , ct , 1 ) ;
else
return FSE_compress_usingCTable_generic ( dst , dstSize , src , srcSize , ct , 0 ) ;
}
size_t FSE_compressBound ( size_t size ) { return FSE_COMPRESSBOUND ( size ) ; }
2021-01-08 10:21:43 +00:00
# ifndef ZSTD_NO_UNUSED_FUNCTIONS
2017-06-09 01:43:56 +00:00
/* FSE_compress_wksp() :
* Same as FSE_compress2 ( ) , but using an externally allocated scratch buffer ( ` workSpace ` ) .
* ` wkspSize ` size must be ` ( 1 < < tableLog ) ` .
*/
size_t FSE_compress_wksp ( void * dst , size_t dstSize , const void * src , size_t srcSize , unsigned maxSymbolValue , unsigned tableLog , void * workSpace , size_t wkspSize )
{
BYTE * const ostart = ( BYTE * ) dst ;
BYTE * op = ostart ;
BYTE * const oend = ostart + dstSize ;
2019-01-04 00:30:03 +00:00
unsigned count [ FSE_MAX_SYMBOL_VALUE + 1 ] ;
2017-06-09 01:43:56 +00:00
S16 norm [ FSE_MAX_SYMBOL_VALUE + 1 ] ;
FSE_CTable * CTable = ( FSE_CTable * ) workSpace ;
size_t const CTableSize = FSE_CTABLE_SIZE_U32 ( tableLog , maxSymbolValue ) ;
void * scratchBuffer = ( void * ) ( CTable + CTableSize ) ;
size_t const scratchBufferSize = wkspSize - ( CTableSize * sizeof ( FSE_CTable ) ) ;
/* init conditions */
2021-01-08 10:21:43 +00:00
if ( wkspSize < FSE_COMPRESS_WKSP_SIZE_U32 ( tableLog , maxSymbolValue ) ) return ERROR ( tableLog_tooLarge ) ;
2017-06-09 01:43:56 +00:00
if ( srcSize < = 1 ) return 0 ; /* Not compressible */
if ( ! maxSymbolValue ) maxSymbolValue = FSE_MAX_SYMBOL_VALUE ;
if ( ! tableLog ) tableLog = FSE_DEFAULT_TABLELOG ;
/* Scan input and build symbol stats */
2019-01-04 00:30:03 +00:00
{ CHECK_V_F ( maxCount , HIST_count_wksp ( count , & maxSymbolValue , src , srcSize , scratchBuffer , scratchBufferSize ) ) ;
2017-06-09 01:43:56 +00:00
if ( maxCount = = srcSize ) return 1 ; /* only a single symbol in src : rle */
if ( maxCount = = 1 ) return 0 ; /* each symbol present maximum once => not compressible */
if ( maxCount < ( srcSize > > 7 ) ) return 0 ; /* Heuristic : not compressible enough */
}
tableLog = FSE_optimalTableLog ( tableLog , srcSize , maxSymbolValue ) ;
2021-01-08 10:21:43 +00:00
CHECK_F ( FSE_normalizeCount ( norm , tableLog , count , srcSize , maxSymbolValue , /* useLowProbCount */ srcSize > = 2048 ) ) ;
2017-06-09 01:43:56 +00:00
/* Write table description header */
{ CHECK_V_F ( nc_err , FSE_writeNCount ( op , oend - op , norm , maxSymbolValue , tableLog ) ) ;
op + = nc_err ;
}
/* Compress */
CHECK_F ( FSE_buildCTable_wksp ( CTable , norm , maxSymbolValue , tableLog , scratchBuffer , scratchBufferSize ) ) ;
{ CHECK_V_F ( cSize , FSE_compress_usingCTable ( op , oend - op , src , srcSize , CTable ) ) ;
if ( cSize = = 0 ) return 0 ; /* not enough space for compressed data */
op + = cSize ;
}
/* check compressibility */
if ( ( size_t ) ( op - ostart ) > = srcSize - 1 ) return 0 ;
return op - ostart ;
}
typedef struct {
FSE_CTable CTable_max [ FSE_CTABLE_SIZE_U32 ( FSE_MAX_TABLELOG , FSE_MAX_SYMBOL_VALUE ) ] ;
2021-01-08 10:21:43 +00:00
union {
U32 hist_wksp [ HIST_WKSP_SIZE_U32 ] ;
BYTE scratchBuffer [ 1 < < FSE_MAX_TABLELOG ] ;
} workspace ;
2017-06-09 01:43:56 +00:00
} fseWkspMax_t ;
size_t FSE_compress2 ( void * dst , size_t dstCapacity , const void * src , size_t srcSize , unsigned maxSymbolValue , unsigned tableLog )
{
fseWkspMax_t scratchBuffer ;
2021-01-08 10:21:43 +00:00
DEBUG_STATIC_ASSERT ( sizeof ( scratchBuffer ) > = FSE_COMPRESS_WKSP_SIZE_U32 ( FSE_MAX_TABLELOG , FSE_MAX_SYMBOL_VALUE ) ) ; /* compilation failures here means scratchBuffer is not large enough */
2017-06-09 01:43:56 +00:00
if ( tableLog > FSE_MAX_TABLELOG ) return ERROR ( tableLog_tooLarge ) ;
return FSE_compress_wksp ( dst , dstCapacity , src , srcSize , maxSymbolValue , tableLog , & scratchBuffer , sizeof ( scratchBuffer ) ) ;
}
size_t FSE_compress ( void * dst , size_t dstCapacity , const void * src , size_t srcSize )
{
return FSE_compress2 ( dst , dstCapacity , src , srcSize , FSE_MAX_SYMBOL_VALUE , FSE_DEFAULT_TABLELOG ) ;
}
2021-01-08 10:21:43 +00:00
# endif
2017-06-09 01:43:56 +00:00
# endif /* FSE_COMMONDEFS_ONLY */