zstd: Update to upstream version 1.5.0

Release notes: https://github.com/facebook/zstd/releases/tag/v1.5.0
This commit is contained in:
Rémi Verschelde 2021-11-19 12:54:45 +01:00
parent 42f8bfaff0
commit 5bea1370f0
No known key found for this signature in database
GPG key ID: C3336907360768E1
57 changed files with 4124 additions and 1358 deletions

View file

@ -1,6 +1,6 @@
/* ******************************************************************
* FSE : Finite State Entropy encoder
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View file

@ -1,7 +1,7 @@
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View file

@ -1,7 +1,7 @@
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View file

@ -1,6 +1,6 @@
/* ******************************************************************
* Huffman encoder, part of New Generation Entropy library
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -59,7 +59,15 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
*/
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
typedef struct {
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
unsigned count[HUF_TABLELOG_MAX+1];
S16 norm[HUF_TABLELOG_MAX+1];
} HUF_CompressWeightsWksp;
static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
@ -67,33 +75,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
unsigned count[HUF_TABLELOG_MAX+1];
S16 norm[HUF_TABLELOG_MAX+1];
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
/* init conditions */
if (wtSize <= 1) return 0; /* Not compressible */
/* Scan input and build symbol stats */
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
}
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
/* Write table description header */
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
op += hSize;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
@ -102,29 +107,33 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
}
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
typedef struct {
HUF_CompressWeightsWksp wksp;
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
} HUF_WriteCTableWksp;
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
void* workspace, size_t workspaceSize)
{
BYTE* op = (BYTE*)dst;
U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
/* check conditions */
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
/* convert to weight */
bitsToWeight[0] = 0;
wksp->bitsToWeight[0] = 0;
for (n=1; n<huffLog+1; n++)
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
for (n=0; n<maxSymbolValue; n++)
huffWeight[n] = bitsToWeight[CTable[n].nbBits];
wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
/* attempt weights compression by FSE */
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
op[0] = (BYTE)hSize;
return hSize+1;
@ -134,12 +143,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
for (n=0; n<maxSymbolValue; n+=2)
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
return ((maxSymbolValue+1)/2) + 1;
}
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
HUF_WriteCTableWksp wksp;
return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
}
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
@ -732,7 +751,10 @@ static size_t HUF_compressCTable_internal(
typedef struct {
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
HUF_buildCTable_wksp_tables buildCTable_wksp;
union {
HUF_buildCTable_wksp_tables buildCTable_wksp;
HUF_WriteCTableWksp writeCTable_wksp;
} wksps;
} HUF_compress_tables_t;
/* HUF_compress_internal() :
@ -795,7 +817,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
&table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
/* Zero unused symbols in CTable, so we can check it for validity */
@ -804,7 +826,8 @@ HUF_compress_internal (void* dst, size_t dstSize,
}
/* Write table description header */
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
/* Check if using previous huffman table is beneficial */
if (repeat && *repeat != HUF_repeat_none) {
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -81,6 +81,53 @@ typedef struct {
ZSTD_fseCTables_t fse;
} ZSTD_entropyCTables_t;
/***********************************************
* Entropy buffer statistics structs and funcs *
***********************************************/
/** ZSTD_hufCTablesMetadata_t :
* Stores Literals Block Type for a super-block in hType, and
* huffman tree description in hufDesBuffer.
* hufDesSize refers to the size of huffman tree description in bytes.
* This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
typedef struct {
symbolEncodingType_e hType;
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
size_t hufDesSize;
} ZSTD_hufCTablesMetadata_t;
/** ZSTD_fseCTablesMetadata_t :
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
* fse tables in fseTablesBuffer.
* fseTablesSize refers to the size of fse tables in bytes.
* This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
typedef struct {
symbolEncodingType_e llType;
symbolEncodingType_e ofType;
symbolEncodingType_e mlType;
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
size_t fseTablesSize;
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
} ZSTD_fseCTablesMetadata_t;
typedef struct {
ZSTD_hufCTablesMetadata_t hufMetadata;
ZSTD_fseCTablesMetadata_t fseMetadata;
} ZSTD_entropyCTablesMetadata_t;
/** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block.
* @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize);
/*********************************
* Compression internals structs *
*********************************/
typedef struct {
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
U32 len; /* Raw length of match */
@ -141,14 +188,21 @@ typedef struct {
} ZSTD_compressedBlockState_t;
typedef struct {
BYTE const* nextSrc; /* next block here to continue on current prefix */
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more valid data */
BYTE const* nextSrc; /* next block here to continue on current prefix */
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more valid data */
U32 nbOverflowCorrections; /* Number of times overflow correction has run since
* ZSTD_window_init(). Useful for debugging coredumps
* and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
*/
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
@ -160,9 +214,17 @@ struct ZSTD_matchState_t {
*/
U32 nextToUpdate; /* index from which to continue table update */
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
int dedicatedDictSearch; /* Indicates whether this matchState is using the
* dedicated dictionary search structure.
*/
@ -183,13 +245,22 @@ typedef struct {
U32 checksum;
} ldmEntry_t;
typedef struct {
BYTE const* split;
U32 hash;
U32 checksum;
ldmEntry_t* bucket;
} ldmMatchCandidate_t;
#define LDM_BATCH_SIZE 64
typedef struct {
ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
U32 loadedDictEnd;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
* Depends on ldmParams.minMatchLength */
size_t splitIndices[LDM_BATCH_SIZE];
ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
} ldmState_t;
typedef struct {
@ -246,6 +317,15 @@ struct ZSTD_CCtx_params_s {
ZSTD_sequenceFormat_e blockDelimiters;
int validateSequences;
/* Block splitting */
int splitBlocks;
/* Param for deciding whether to use row-based matchfinder */
ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
/* Always load a dictionary in ext-dict mode (not prefix mode)? */
int deterministicRefPrefix;
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
@ -269,7 +349,9 @@ struct ZSTD_CCtx_s {
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
ZSTD_CCtx_params requestedParams;
ZSTD_CCtx_params appliedParams;
ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
U32 dictID;
size_t dictContentSize;
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
size_t blockSize;
@ -321,6 +403,11 @@ struct ZSTD_CCtx_s {
#ifdef ZSTD_MULTITHREAD
ZSTDMT_CCtx* mtctx;
#endif
/* Tracing */
#if ZSTD_TRACE
ZSTD_TraceCtx traceCtx;
#endif
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@ -355,7 +442,7 @@ typedef enum {
typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@ -532,8 +619,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
/* literal Length */
if (litLength>0xFFFF) {
assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
seqStorePtr->longLengthID = 1;
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
seqStorePtr->longLengthType = ZSTD_llt_literalLength;
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
}
seqStorePtr->sequences[0].litLength = (U16)litLength;
@ -543,8 +630,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
/* match Length */
if (mlBase>0xFFFF) {
assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
seqStorePtr->longLengthID = 2;
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
seqStorePtr->longLengthType = ZSTD_llt_matchLength;
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
}
seqStorePtr->sequences[0].matchLength = (U16)mlBase;
@ -795,6 +882,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
window->dictLimit = end;
}
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
{
return window.dictLimit == 1 &&
window.lowLimit == 1 &&
(window.nextSrc - window.base) == 1;
}
/**
* ZSTD_window_hasExtDict():
* Returns non-zero if the window has a non-empty extDict.
@ -818,15 +912,69 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
ZSTD_noDict;
}
/* Defining this macro to non-zero tells zstd to run the overflow correction
* code much more frequently. This is very inefficient, and should only be
* used for tests and fuzzers.
*/
#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
# else
# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
# endif
#endif
/**
* ZSTD_window_canOverflowCorrect():
* Returns non-zero if the indices are large enough for overflow correction
* to work correctly without impacting compression ratio.
*/
MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
U32 cycleLog,
U32 maxDist,
U32 loadedDictEnd,
void const* src)
{
U32 const cycleSize = 1u << cycleLog;
U32 const curr = (U32)((BYTE const*)src - window.base);
U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
/* Adjust the min index to backoff the overflow correction frequency,
* so we don't waste too much CPU in overflow correction. If this
* computation overflows we don't really care, we just need to make
* sure it is at least minIndexToOverflowCorrect.
*/
U32 const adjustment = window.nbOverflowCorrections + 1;
U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
minIndexToOverflowCorrect);
U32 const indexLargeEnough = curr > adjustedIndex;
/* Only overflow correct early if the dictionary is invalidated already,
* so we don't hurt compression ratio.
*/
U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
return indexLargeEnough && dictionaryInvalidated;
}
/**
* ZSTD_window_needOverflowCorrection():
* Returns non-zero if the indices are getting too large and need overflow
* protection.
*/
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
U32 cycleLog,
U32 maxDist,
U32 loadedDictEnd,
void const* src,
void const* srcEnd)
{
U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
return 1;
}
}
return curr > ZSTD_CURRENT_MAX;
}
@ -838,7 +986,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
*
* The least significant cycleLog bits of the indices must remain the same,
* which may be 0. Every index up to maxDist in the past must be valid.
* NOTE: (maxDist & cycleMask) must be zero.
*/
MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
U32 maxDist, void const* src)
@ -862,17 +1009,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
*/
U32 const cycleMask = (1U << cycleLog) - 1;
U32 const cycleSize = 1u << cycleLog;
U32 const cycleMask = cycleSize - 1;
U32 const curr = (U32)((BYTE const*)src - window->base);
U32 const currentCycle0 = curr & cycleMask;
/* Exclude zero so that newCurrent - maxDist >= 1. */
U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
U32 const newCurrent = currentCycle1 + maxDist;
U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
U32 const correction = curr - newCurrent;
assert((maxDist & cycleMask) == 0);
/* maxDist must be a power of two so that:
* (newCurrent & cycleMask) == (curr & cycleMask)
* This is required to not corrupt the chains / binary tree.
*/
assert((maxDist & (maxDist - 1)) == 0);
assert((curr & cycleMask) == (newCurrent & cycleMask));
assert(curr > newCurrent);
/* Loose bound, should be around 1<<29 (see above) */
assert(correction > 1<<28);
if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
/* Loose bound, should be around 1<<29 (see above) */
assert(correction > 1<<28);
}
window->base += correction;
window->dictBase += correction;
@ -888,6 +1043,8 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
assert(window->lowLimit <= newCurrent);
assert(window->dictLimit <= newCurrent);
++window->nbOverflowCorrections;
DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
window->lowLimit);
return correction;
@ -997,6 +1154,7 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
window->nextSrc = window->base + 1; /* see issue #1241 */
window->nbOverflowCorrections = 0;
}
/**
@ -1007,7 +1165,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
* Returns non-zero if the segment is contiguous.
*/
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
void const* src, size_t srcSize)
void const* src, size_t srcSize,
int forceNonContiguous)
{
BYTE const* const ip = (BYTE const*)src;
U32 contiguous = 1;
@ -1017,7 +1176,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
assert(window->base != NULL);
assert(window->dictBase != NULL);
/* Check if blocks follow each other */
if (src != window->nextSrc) {
if (src != window->nextSrc || forceNonContiguous) {
/* not contiguous */
size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
@ -1200,4 +1359,9 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe
* condition for correct operation : hashLog > 1 */
U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
/** ZSTD_CCtx_trace() :
* Trace the end of a compression call.
*/
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
#endif /* ZSTD_COMPRESS_H */

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -15,7 +15,7 @@
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
@ -42,7 +42,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
@ -117,7 +117,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
}
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
{
unsigned cost = 0;
unsigned s;
assert(total > 0);
for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0)
@ -232,6 +234,11 @@ ZSTD_selectEncodingType(
return set_compressed;
}
typedef struct {
S16 norm[MaxSeq + 1];
U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
} ZSTD_BuildCTableWksp;
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
@ -258,7 +265,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
if (count[codeTable[nbSeq-1]] > 1) {
@ -266,11 +273,12 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
nbSeq_1--;
}
assert(nbSeq_1 > 1);
assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
(void)entropyWorkspaceSize;
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
return NCountSize;
}
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -15,288 +15,10 @@
#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
#include "hist.h" /* HIST_countFast_wksp */
#include "zstd_compress_internal.h"
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
#include "zstd_compress_sequences.h"
#include "zstd_compress_literals.h"
/*-*************************************
* Superblock entropy buffer structs
***************************************/
/** ZSTD_hufCTablesMetadata_t :
* Stores Literals Block Type for a super-block in hType, and
* huffman tree description in hufDesBuffer.
* hufDesSize refers to the size of huffman tree description in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
typedef struct {
symbolEncodingType_e hType;
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
size_t hufDesSize;
} ZSTD_hufCTablesMetadata_t;
/** ZSTD_fseCTablesMetadata_t :
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
* fse tables in fseTablesBuffer.
* fseTablesSize refers to the size of fse tables in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
typedef struct {
symbolEncodingType_e llType;
symbolEncodingType_e ofType;
symbolEncodingType_e mlType;
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
size_t fseTablesSize;
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
} ZSTD_fseCTablesMetadata_t;
typedef struct {
ZSTD_hufCTablesMetadata_t hufMetadata;
ZSTD_fseCTablesMetadata_t fseMetadata;
} ZSTD_entropyCTablesMetadata_t;
/** ZSTD_buildSuperBlockEntropy_literal() :
* Builds entropy for the super-block literals.
* Stores literals block type (raw, rle, compressed, repeat) and
* huffman description table to hufMetadata.
* @return : size of huffman description table or error code */
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_hufCTablesMetadata_t* hufMetadata,
const int disableLiteralsCompression,
void* workspace, size_t wkspSize)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
BYTE* const countWkspStart = wkspStart;
unsigned* const countWksp = (unsigned*)workspace;
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
BYTE* const nodeWksp = countWkspStart + countWkspSize;
const size_t nodeWkspSize = wkspEnd-nodeWksp;
unsigned maxSymbolValue = 255;
unsigned huffLog = HUF_TABLELOG_DEFAULT;
HUF_repeat repeat = prevHuf->repeatMode;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
/* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
if (disableLiteralsCompression) {
DEBUGLOG(5, "set_basic - disabled");
hufMetadata->hType = set_basic;
return 0;
}
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) {
DEBUGLOG(5, "set_basic - too small");
hufMetadata->hType = set_basic;
return 0;
}
}
/* Scan input and build symbol stats */
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
if (largest == srcSize) {
DEBUGLOG(5, "set_rle");
hufMetadata->hType = set_rle;
return 0;
}
if (largest <= (srcSize >> 7)+4) {
DEBUGLOG(5, "set_basic - no gain");
hufMetadata->hType = set_basic;
return 0;
}
}
/* Validate the previous Huffman table */
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
repeat = HUF_repeat_none;
}
/* Build Huffman Tree */
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
maxSymbolValue, huffLog,
nodeWksp, nodeWkspSize);
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
huffLog = (U32)maxBits;
{ /* Build and write the CTable */
size_t const newCSize = HUF_estimateCompressedSize(
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
size_t const hSize = HUF_writeCTable(
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
/* Check against repeating the previous CTable */
if (repeat != HUF_repeat_none) {
size_t const oldCSize = HUF_estimateCompressedSize(
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
DEBUGLOG(5, "set_repeat - smaller");
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
hufMetadata->hType = set_repeat;
return 0;
}
}
if (newCSize + hSize >= srcSize) {
DEBUGLOG(5, "set_basic - no gains");
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
hufMetadata->hType = set_basic;
return 0;
}
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
hufMetadata->hType = set_compressed;
nextHuf->repeatMode = HUF_repeat_check;
return hSize;
}
}
}
/** ZSTD_buildSuperBlockEntropy_sequences() :
* Builds entropy for the super-block sequences.
* Stores symbol compression modes and fse table to fseMetadata.
* @return : size of fse tables or error code */
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
const ZSTD_fseCTables_t* prevEntropy,
ZSTD_fseCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_fseCTablesMetadata_t* fseMetadata,
void* workspace, size_t wkspSize)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
BYTE* const countWkspStart = wkspStart;
unsigned* const countWksp = (unsigned*)workspace;
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
BYTE* const cTableWksp = countWkspStart + countWkspSize;
const size_t cTableWkspSize = wkspEnd-cTableWksp;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* const ostart = fseMetadata->fseTablesBuffer;
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
BYTE* op = ostart;
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
ZSTD_memset(workspace, 0, wkspSize);
fseMetadata->lastCountSize = 0;
/* convert length/distances into codes */
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ U32 LLtype;
unsigned max = MaxLL;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
countWksp, max, mostFrequent, nbSeq,
LLFSELog, prevEntropy->litlengthCTable,
LL_defaultNorm, LL_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
if (LLtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->llType = (symbolEncodingType_e) LLtype;
} }
/* build CTable for Offsets */
{ U32 Offtype;
unsigned max = MaxOff;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
countWksp, max, mostFrequent, nbSeq,
OffFSELog, prevEntropy->offcodeCTable,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
if (Offtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
} }
/* build CTable for MatchLengths */
{ U32 MLtype;
unsigned max = MaxML;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
countWksp, max, mostFrequent, nbSeq,
MLFSELog, prevEntropy->matchlengthCTable,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
if (MLtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
} }
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
return op-ostart;
}
/** ZSTD_buildSuperBlockEntropy() :
* Builds entropy for the super-block.
* @return : 0 on success or error code */
static size_t
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize)
{
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
entropyMetadata->hufMetadata.hufDesSize =
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
&prevEntropy->huf, &nextEntropy->huf,
&entropyMetadata->hufMetadata,
ZSTD_disableLiteralsCompression(cctxParams),
workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
entropyMetadata->fseMetadata.fseTablesSize =
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
&prevEntropy->fse, &nextEntropy->fse,
cctxParams,
&entropyMetadata->fseMetadata,
workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
return 0;
}
/** ZSTD_compressSubBlock_literal() :
* Compresses literals section for a sub-block.
* When we have to write the Huffman table we will sometimes choose a header
@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
* before we know the table size + compressed size, so we have a bound on the
* table size. If we guessed incorrectly, we fall back to uncompressed literals.
*
* We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
* in writing the header, otherwise it is set to 0.
*
* hufMetadata->hType has literals block type info.
@ -643,8 +365,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
void* workspace, size_t wkspSize,
int writeEntropy)
{
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
size_t cSeqSizeEstimate = 0;
if (nbSeq == 0) return sequencesSectionHeaderSize;
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
nbSeq, fseTables->offcodeCTable, NULL,
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
@ -830,7 +553,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
unsigned lastBlock) {
ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
&zc->blockState.prevCBlock->entropy,
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -35,6 +35,10 @@ extern "C" {
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
#endif
/* Set our tables and aligneds to align by 64 bytes */
#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
/*-*************************************
* Structures
***************************************/
@ -117,10 +121,11 @@ typedef enum {
* - Tables: these are any of several different datastructures (hash tables,
* chain tables, binary trees) that all respect a common format: they are
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams.
* Their sizes depend on the cparams. These tables are 64-byte aligned.
*
* - Aligned: these buffers are used for various purposes that require 4 byte
* alignment, but don't require any initialization before they're used.
* alignment, but don't require any initialization before they're used. These
* buffers are each aligned to 64 bytes.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
@ -133,8 +138,7 @@ typedef enum {
*
* 1. Objects
* 2. Buffers
* 3. Aligned
* 4. Tables
* 3. Aligned/Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
@ -187,6 +191,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
* Since tables aren't currently redzoned, you don't need to call through this
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
if (size == 0)
@ -198,30 +204,110 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
#endif
}
MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
/**
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
* Used to determine the number of bytes required for a given "aligned".
*/
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
}
/**
* Returns the amount of additional space the cwksp must allocate
* for internal purposes (currently only alignment).
*/
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes
* to align the beginning of the aligned secion.
*
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
* aligneds being sized in multiples of 64 bytes.
*/
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
return slackSpace;
}
/**
* Return the number of additional bytes required to align a pointer to the given number of bytes.
* alignBytes must be a power of two.
*/
MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0);
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
return bytes;
}
/**
* Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
* counts from the end of the wksp. (as opposed to the object/table segment)
*
* Returns a pointer to the beginning of that space.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
void* const alloc = (BYTE*)ws->allocStart - bytes;
void* const bottom = ws->tableEnd;
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(alloc >= bottom);
if (alloc < bottom) {
DEBUGLOG(4, "cwksp: alloc failed!");
ws->allocFailed = 1;
return NULL;
}
if (alloc < ws->tableValidEnd) {
ws->tableValidEnd = alloc;
}
ws->allocStart = alloc;
return alloc;
}
/**
* Moves the cwksp to the next phase, and does any necessary allocations.
* Returns a 0 on success, or zstd error
*/
MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
assert(phase >= ws->phase);
if (phase > ws->phase) {
/* Going from allocating objects to allocating buffers */
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
phase >= ZSTD_cwksp_alloc_buffers) {
ws->tableValidEnd = ws->objectEnd;
}
/* Going from allocating buffers to allocating aligneds/tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
/* If unaligned allocations down from a too-large top have left us
* unaligned, we need to realign our alloc ptr. Technically, this
* can consume space that is unaccounted for in the neededSpace
* calculation. However, I believe this can only happen when the
* workspace is too large, and specifically when it is too large
* by a larger margin than the space that will be consumed. */
/* TODO: cleaner, compiler warning friendly way to do this??? */
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
if (ws->allocStart < ws->tableValidEnd) {
ws->tableValidEnd = ws->allocStart;
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
size_t const bytesToAlign =
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
memory_allocation, "aligned phase - alignment initial allocation failed!");
}
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
void* const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* const end = (BYTE*)alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!");
ws->objectEnd = end;
ws->tableEnd = end;
ws->tableValidEnd = end;
}
}
ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws);
}
return 0;
}
/**
@ -237,38 +323,25 @@ MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
void* alloc;
void* bottom = ws->tableEnd;
ZSTD_cwksp_internal_advance_phase(ws, phase);
alloc = (BYTE *)ws->allocStart - bytes;
if (bytes == 0)
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
return NULL;
}
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(alloc >= bottom);
if (alloc < bottom) {
DEBUGLOG(4, "cwksp: alloc failed!");
ws->allocFailed = 1;
return NULL;
}
if (alloc < ws->tableValidEnd) {
ws->tableValidEnd = alloc;
}
ws->allocStart = alloc;
alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
__asan_unpoison_memory_region(alloc, bytes);
if (alloc) {
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
__asan_unpoison_memory_region(alloc, bytes);
}
}
#endif
@ -283,28 +356,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
}
/**
* Reserves and returns memory sized on and aligned on sizeof(unsigned).
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
assert((bytes & (sizeof(U32)-1)) == 0);
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
ZSTD_cwksp_alloc_aligned);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
return ptr;
}
/**
* Aligned on sizeof(unsigned). These buffers have the special property that
* Aligned on 64 bytes. These buffers have the special property that
* their values remain constrained, allowing us to re-use them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
void* alloc = ws->tableEnd;
void* end = (BYTE *)alloc + bytes;
void* top = ws->allocStart;
void* alloc;
void* end;
void* top;
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
}
alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes;
top = ws->allocStart;
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
assert((bytes & (sizeof(U32)-1)) == 0);
ZSTD_cwksp_internal_advance_phase(ws, phase);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(end <= top);
if (end > top) {
@ -320,6 +401,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
}
#endif
assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
return alloc;
}
@ -503,7 +586,7 @@ MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
/**
* Moves the management of a workspace from one cwksp to another. The src cwksp
* is left in an invalid state (src must be re-init()'ed before its used again).
* is left in an invalid state (src must be re-init()'ed before it's used again).
*/
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
*dst = *src;
@ -527,6 +610,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Functions Checking Free Space
***************************************/
/* ZSTD_alignmentSpaceWithinBounds() :
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used.
*/
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
size_t const estimatedSpace, int resizedWorkspace) {
if (resizedWorkspace) {
/* Resized/newly allocated wksp should have exact bounds */
return ZSTD_cwksp_used(ws) == estimatedSpace;
} else {
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
*/
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
}
}
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
& (repIndex > dictStartIndex))
& (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
& (repIndex2 > dictStartIndex))
& (offset_2 < current2 - dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
/* ensure there will be no no underflow
/* ensure there will be no underflow
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
@ -416,9 +416,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = curr; /* update hash table */
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
assert(offset_1 <= curr +1); /* check repIndex */
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
& (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -26,6 +26,7 @@ extern "C" {
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
@ -43,6 +44,15 @@ size_t ZSTD_compressBlock_lazy(
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -56,6 +66,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState(
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -66,6 +85,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -76,9 +104,19 @@ size_t ZSTD_compressBlock_lazy_extDict(
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -11,13 +11,126 @@
#include "zstd_ldm.h"
#include "../common/debug.h"
#include "../common/xxhash.h"
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
#include "zstd_ldm_geartab.h"
#define LDM_BUCKET_SIZE_LOG 3
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_RLOG 7
#define LDM_HASH_CHAR_OFFSET 10
typedef struct {
U64 rolling;
U64 stopMask;
} ldmRollingHashState_t;
/** ZSTD_ldm_gear_init():
*
* Initializes the rolling hash state such that it will honor the
* settings in params. */
static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
{
unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
unsigned hashRateLog = params->hashRateLog;
state->rolling = ~(U32)0;
/* The choice of the splitting criterion is subject to two conditions:
* 1. it has to trigger on average every 2^(hashRateLog) bytes;
* 2. ideally, it has to depend on a window of minMatchLength bytes.
*
* In the gear hash algorithm, bit n depends on the last n bytes;
* so in order to obtain a good quality splitting criterion it is
* preferable to use bits with high weight.
*
* To match condition 1 we use a mask with hashRateLog bits set
* and, because of the previous remark, we make sure these bits
* have the highest possible weight while still respecting
* condition 2.
*/
if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
} else {
/* In this degenerate case we simply honor the hash rate. */
state->stopMask = ((U64)1 << hashRateLog) - 1;
}
}
/** ZSTD_ldm_gear_reset()
* Feeds [data, data + minMatchLength) into the hash without registering any
* splits. This effectively resets the hash state. This is used when skipping
* over data, either at the beginning of a block, or skipping sections.
*/
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
BYTE const* data, size_t minMatchLength)
{
U64 hash = state->rolling;
size_t n = 0;
#define GEAR_ITER_ONCE() do { \
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
n += 1; \
} while (0)
while (n + 3 < minMatchLength) {
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
}
while (n < minMatchLength) {
GEAR_ITER_ONCE();
}
#undef GEAR_ITER_ONCE
}
/** ZSTD_ldm_gear_feed():
*
* Registers in the splits array all the split points found in the first
* size bytes following the data pointer. This function terminates when
* either all the data has been processed or LDM_BATCH_SIZE splits are
* present in the splits array.
*
* Precondition: The splits array must not be full.
* Returns: The number of bytes processed. */
static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
BYTE const* data, size_t size,
size_t* splits, unsigned* numSplits)
{
size_t n;
U64 hash, mask;
hash = state->rolling;
mask = state->stopMask;
n = 0;
#define GEAR_ITER_ONCE() do { \
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
n += 1; \
if (UNLIKELY((hash & mask) == 0)) { \
splits[*numSplits] = n; \
*numSplits += 1; \
if (*numSplits == LDM_BATCH_SIZE) \
goto done; \
} \
} while (0)
while (n + 3 < size) {
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
GEAR_ITER_ONCE();
}
while (n < size) {
GEAR_ITER_ONCE();
}
#undef GEAR_ITER_ONCE
done:
state->rolling = hash;
return n;
}
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
ZSTD_compressionParameters const* cParams)
@ -54,41 +167,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
}
/** ZSTD_ldm_getSmallHash() :
* numBits should be <= 32
* If numBits==0, returns 0.
* @return : the most significant numBits of value. */
static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
{
assert(numBits <= 32);
return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
}
/** ZSTD_ldm_getChecksum() :
* numBitsToDiscard should be <= 32
* @return : the next most significant 32 bits after numBitsToDiscard */
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
{
assert(numBitsToDiscard <= 32);
return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
}
/** ZSTD_ldm_getTag() ;
* Given the hash, returns the most significant numTagBits bits
* after (32 + hbits) bits.
*
* If there are not enough bits remaining, return the last
* numTagBits bits. */
static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
{
assert(numTagBits < 32 && hbits <= 32);
if (32 - hbits < numTagBits) {
return hash & (((U32)1 << numTagBits) - 1);
} else {
return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
}
}
/** ZSTD_ldm_getBucket() :
* Returns a pointer to the start of the bucket associated with hash. */
static ldmEntry_t* ZSTD_ldm_getBucket(
@ -103,38 +181,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
size_t const hash, const ldmEntry_t entry,
ldmParams_t const ldmParams)
{
BYTE* const bucketOffsets = ldmState->bucketOffsets;
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
bucketOffsets[hash]++;
bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
}
BYTE* const pOffset = ldmState->bucketOffsets + hash;
unsigned const offset = *pOffset;
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
*pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
/** ZSTD_ldm_makeEntryAndInsertByTag() :
*
* Gets the small hash, checksum, and tag from the rollingHash.
*
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
* creates an ldmEntry from the offset, and inserts it into the hash table.
*
* hBits is the length of the small hash, which is the most significant hBits
* of rollingHash. The checksum is the next 32 most significant bits, followed
* by ldmParams.hashRateLog bits that make up the tag. */
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
U64 const rollingHash,
U32 const hBits,
U32 const offset,
ldmParams_t const ldmParams)
{
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
if (tag == tagMask) {
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
ldmEntry_t entry;
entry.offset = offset;
entry.checksum = checksum;
ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
}
}
/** ZSTD_ldm_countBackwardsMatch() :
@ -212,43 +264,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
return 0;
}
/** ZSTD_ldm_fillLdmHashTable() :
*
* Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
* lastHash is the rolling hash that corresponds to lastHashed.
*
* Returns the rolling hash corresponding to position iend-1. */
static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
U64 lastHash, const BYTE* lastHashed,
const BYTE* iend, const BYTE* base,
U32 hBits, ldmParams_t const ldmParams)
{
U64 rollingHash = lastHash;
const BYTE* cur = lastHashed + 1;
while (cur < iend) {
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
cur[ldmParams.minMatchLength-1],
state->hashPower);
ZSTD_ldm_makeEntryAndInsertByTag(state,
rollingHash, hBits,
(U32)(cur - base), ldmParams);
++cur;
}
return rollingHash;
}
void ZSTD_ldm_fillHashTable(
ldmState_t* state, const BYTE* ip,
ldmState_t* ldmState, const BYTE* ip,
const BYTE* iend, ldmParams_t const* params)
{
U32 const minMatchLength = params->minMatchLength;
U32 const hBits = params->hashLog - params->bucketSizeLog;
BYTE const* const base = ldmState->window.base;
BYTE const* const istart = ip;
ldmRollingHashState_t hashState;
size_t* const splits = ldmState->splitIndices;
unsigned numSplits;
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
if ((size_t)(iend - ip) >= params->minMatchLength) {
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
ZSTD_ldm_fillLdmHashTable(
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
params->hashLog - params->bucketSizeLog,
*params);
ZSTD_ldm_gear_init(&hashState, params);
while (ip < iend) {
size_t hashed;
unsigned n;
numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
for (n = 0; n < numSplits; n++) {
if (ip + splits[n] >= istart + minMatchLength) {
BYTE const* const split = ip + splits[n] - minMatchLength;
U64 const xxhash = XXH64(split, minMatchLength, 0);
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
ldmEntry_t entry;
entry.offset = (U32)(split - base);
entry.checksum = (U32)(xxhash >> 32);
ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
}
}
ip += hashed;
}
}
@ -274,11 +325,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
/* LDM parameters */
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
U32 const minMatchLength = params->minMatchLength;
U64 const hashPower = ldmState->hashPower;
U32 const entsPerBucket = 1U << params->bucketSizeLog;
U32 const hBits = params->hashLog - params->bucketSizeLog;
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
U32 const hashRateLog = params->hashRateLog;
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
/* Prefix and extDict parameters */
U32 const dictLimit = ldmState->window.dictLimit;
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@ -290,45 +338,69 @@ static size_t ZSTD_ldm_generateSequences_internal(
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize;
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
BYTE const* const ilimit = iend - HASH_READ_SIZE;
/* Input positions */
BYTE const* anchor = istart;
BYTE const* ip = istart;
/* Rolling hash */
BYTE const* lastHashed = NULL;
U64 rollingHash = 0;
/* Rolling hash state */
ldmRollingHashState_t hashState;
/* Arrays for staged-processing */
size_t* const splits = ldmState->splitIndices;
ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
unsigned numSplits;
while (ip <= ilimit) {
size_t mLength;
U32 const curr = (U32)(ip - base);
size_t forwardMatchLength = 0, backwardMatchLength = 0;
ldmEntry_t* bestEntry = NULL;
if (ip != istart) {
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
lastHashed[minMatchLength],
hashPower);
} else {
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
}
lastHashed = ip;
if (srcSize < minMatchLength)
return iend - anchor;
/* Do not insert and do not look for a match */
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
ip++;
continue;
/* Initialize the rolling hash state with the first minMatchLength bytes */
ZSTD_ldm_gear_init(&hashState, params);
ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
ip += minMatchLength;
while (ip < ilimit) {
size_t hashed;
unsigned n;
numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
splits, &numSplits);
for (n = 0; n < numSplits; n++) {
BYTE const* const split = ip + splits[n] - minMatchLength;
U64 const xxhash = XXH64(split, minMatchLength, 0);
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
candidates[n].split = split;
candidates[n].hash = hash;
candidates[n].checksum = (U32)(xxhash >> 32);
candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
PREFETCH_L1(candidates[n].bucket);
}
/* Get the best entry and compute the match lengths */
{
ldmEntry_t* const bucket =
ZSTD_ldm_getBucket(ldmState,
ZSTD_ldm_getSmallHash(rollingHash, hBits),
*params);
ldmEntry_t* cur;
size_t bestMatchLength = 0;
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
for (n = 0; n < numSplits; n++) {
size_t forwardMatchLength = 0, backwardMatchLength = 0,
bestMatchLength = 0, mLength;
U32 offset;
BYTE const* const split = candidates[n].split;
U32 const checksum = candidates[n].checksum;
U32 const hash = candidates[n].hash;
ldmEntry_t* const bucket = candidates[n].bucket;
ldmEntry_t const* cur;
ldmEntry_t const* bestEntry = NULL;
ldmEntry_t newEntry;
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
newEntry.offset = (U32)(split - base);
newEntry.checksum = checksum;
/* If a split point would generate a sequence overlapping with
* the previous one, we merely register it in the hash table and
* move on */
if (split < anchor) {
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
continue;
}
for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
size_t curForwardMatchLength, curBackwardMatchLength,
curTotalMatchLength;
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
@ -342,31 +414,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
cur->offset < dictLimit ? dictEnd : iend;
BYTE const* const lowMatchPtr =
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
curForwardMatchLength = ZSTD_count_2segments(
ip, pMatch, iend,
matchEnd, lowPrefixPtr);
curForwardMatchLength =
ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
if (curForwardMatchLength < minMatchLength) {
continue;
}
curBackwardMatchLength =
ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor,
pMatch, lowMatchPtr,
dictStart, dictEnd);
curTotalMatchLength = curForwardMatchLength +
curBackwardMatchLength;
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
} else { /* !extDict */
BYTE const* const pMatch = base + cur->offset;
curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
curForwardMatchLength = ZSTD_count(split, pMatch, iend);
if (curForwardMatchLength < minMatchLength) {
continue;
}
curBackwardMatchLength =
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
lowPrefixPtr);
curTotalMatchLength = curForwardMatchLength +
curBackwardMatchLength;
ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
}
curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
if (curTotalMatchLength > bestMatchLength) {
bestMatchLength = curTotalMatchLength;
@ -375,57 +439,54 @@ static size_t ZSTD_ldm_generateSequences_internal(
bestEntry = cur;
}
}
}
/* No match found -- continue searching */
if (bestEntry == NULL) {
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
hBits, curr,
*params);
ip++;
continue;
}
/* No match found -- insert an entry into the hash table
* and process the next candidate match */
if (bestEntry == NULL) {
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
continue;
}
/* Match found */
mLength = forwardMatchLength + backwardMatchLength;
ip -= backwardMatchLength;
/* Match found */
offset = (U32)(split - base) - bestEntry->offset;
mLength = forwardMatchLength + backwardMatchLength;
{
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
{
/* Store the sequence:
* ip = curr - backwardMatchLength
* The match is at (bestEntry->offset - backwardMatchLength)
/* Out of sequence storage */
if (rawSeqStore->size == rawSeqStore->capacity)
return ERROR(dstSize_tooSmall);
seq->litLength = (U32)(split - backwardMatchLength - anchor);
seq->matchLength = (U32)mLength;
seq->offset = offset;
rawSeqStore->size++;
}
/* Insert the current entry into the hash table --- it must be
* done after the previous block to avoid clobbering bestEntry */
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
anchor = split + forwardMatchLength;
/* If we find a match that ends after the data that we've hashed
* then we have a repeating, overlapping, pattern. E.g. all zeros.
* If one repetition of the pattern matches our `stopMask` then all
* repetitions will. We don't need to insert them all into out table,
* only the first one. So skip over overlapping matches.
* This is a major speed boost (20x) for compressing a single byte
* repeated, when that byte ends up in the table.
*/
U32 const matchIndex = bestEntry->offset;
U32 const offset = curr - matchIndex;
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
/* Out of sequence storage */
if (rawSeqStore->size == rawSeqStore->capacity)
return ERROR(dstSize_tooSmall);
seq->litLength = (U32)(ip - anchor);
seq->matchLength = (U32)mLength;
seq->offset = offset;
rawSeqStore->size++;
if (anchor > ip + hashed) {
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
/* Continue the outter loop at anchor (ip + hashed == anchor). */
ip = anchor - hashed;
break;
}
}
/* Insert the current entry into the hash table */
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
(U32)(lastHashed - base),
*params);
assert(ip + backwardMatchLength == lastHashed);
/* Fill the hash table from lastHashed+1 to ip+mLength*/
/* Heuristic: don't need to fill the entire table at end of block */
if (ip + mLength <= ilimit) {
rollingHash = ZSTD_ldm_fillLdmHashTable(
ldmState, rollingHash, lastHashed,
ip + mLength, base, hBits, *params);
lastHashed = ip + mLength - 1;
}
ip += mLength;
anchor = ip;
ip += hashed;
}
return iend - anchor;
}
@ -474,7 +535,7 @@ size_t ZSTD_ldm_generateSequences(
assert(chunkStart < iend);
/* 1. Perform overflow correction if necessary. */
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@ -596,12 +657,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
void const* src, size_t srcSize)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
unsigned const minMatch = cParams->minMatch;
ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize;
@ -620,7 +682,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
assert(rawSeqStore->pos <= rawSeqStore->size);
assert(rawSeqStore->size <= rawSeqStore->capacity);
/* Loop through each sequence and apply the block compressor to the lits */
/* Loop through each sequence and apply the block compressor to the literals */
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
/* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore,

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences(
*/
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
void const* src, size_t srcSize);
/**
@ -73,7 +74,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
*
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
* Avoids emitting matches less than `minMatch` bytes.
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
* Must be called for data that is not passed to ZSTD_ldm_blockCompress().
*/
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
U32 const minMatch);

View file

@ -0,0 +1,103 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_LDM_GEARTAB_H
#define ZSTD_LDM_GEARTAB_H
static U64 ZSTD_ldm_gearTab[256] = {
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140,
0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e,
0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b,
0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec,
0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab,
0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c,
0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a,
0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756,
0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
0x2b4da14f2613d8f4
};
#endif /* ZSTD_LDM_GEARTAB_H */

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -472,8 +472,6 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashRateLog < 32);
serialState->ldmState.hashPower =
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
} else {
ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
}
@ -486,10 +484,10 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
unsigned const bucketLog =
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
size_t const bucketSize = (size_t)1 << bucketLog;
unsigned const prevBucketLog =
serialState->params.ldmParams.hashLog -
serialState->params.ldmParams.bucketSizeLog;
size_t const numBuckets = (size_t)1 << bucketLog;
/* Size the seq pool tables */
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
/* Reset the window */
@ -501,20 +499,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
}
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
}
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
return 1;
/* Zero the tables */
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
/* Update window state and fill hash table with dict */
serialState->ldmState.loadedDictEnd = 0;
if (dictSize > 0) {
if (dictContentType == ZSTD_dct_rawContent) {
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
} else {
@ -571,7 +569,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
seqStore.size == 0 && seqStore.capacity > 0);
assert(src.size <= serialState->params.jobSize);
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
error = ZSTD_ldm_generateSequences(
&serialState->ldmState, &seqStore,
&serialState->params.ldmParams, src.start, src.size);
@ -683,6 +681,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
/* Don't run LDM for the chunks, since we handle it externally */
jobParams.ldmParams.enableLdm = 0;
/* Correct nbWorkers to 0. */
jobParams.nbWorkers = 0;
/* init */
@ -695,6 +695,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
}
if (!job->firstJob) {
size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
if (ZSTD_isError(err)) JOB_ERROR(err);
}
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast,
@ -750,6 +754,13 @@ static void ZSTDMT_compressionJob(void* jobDescription)
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }
if (!job->firstJob) {
/* Double check that we don't have an ext-dict, because then our
* repcode invalidation doesn't work.
*/
assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
}
ZSTD_CCtx_trace(cctx, 0);
_endJob:
ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@ -1239,9 +1250,8 @@ size_t ZSTDMT_initCStream_internal(
if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
assert(jobSizeMB >= 1);
U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
mtctx->rsync.hash = 0;
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@ -32,11 +32,11 @@
/* === Constants === */
#ifndef ZSTDMT_NBWORKERS_MAX
# define ZSTDMT_NBWORKERS_MAX 200
#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
#endif
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB)
#ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */
# define ZSTDMT_JOBSIZE_MIN (512 KB)
#endif
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))