Add zstd compression support.

zstd has much better compression speed and ratio, and better decompression speed than currently available methods.
Also set zstd as the default compression method for Compression as well as FileAccessCompressed functions.
This commit is contained in:
Ferenc Arn 2017-06-08 20:43:56 -05:00
parent 01ed55987c
commit f177c15347
38 changed files with 15735 additions and 10 deletions

857
thirdparty/zstd/compress/fse_compress.c vendored Normal file
View file

@ -0,0 +1,857 @@
/* ******************************************************************
FSE : Finite State Entropy encoder
Copyright (C) 2013-2015, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* **************************************************************
* Compiler specifics
****************************************************************/
#ifdef _MSC_VER /* Visual Studio */
# define FORCE_INLINE static __forceinline
# include <intrin.h> /* For Visual 2005 */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
#else
# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
# else
# define FORCE_INLINE static
# endif /* __STDC_VERSION__ */
#endif
/* **************************************************************
* Includes
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
/* **************************************************************
* Error Management
****************************************************************/
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
/* **************************************************************
* Templates
****************************************************************/
/*
designed to be included
for type-specific functions (template emulation in C)
Objective is to write these functions only once, for improved maintenance
*/
/* safety checks */
#ifndef FSE_FUNCTION_EXTENSION
# error "FSE_FUNCTION_EXTENSION must be defined"
#endif
#ifndef FSE_FUNCTION_TYPE
# error "FSE_FUNCTION_TYPE must be defined"
#endif
/* Function names */
#define FSE_CAT(X,Y) X##Y
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
*/
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
U32 const tableSize = 1 << tableLog;
U32 const tableMask = tableSize - 1;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize);
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
U32 highThreshold = tableSize-1;
/* CTable header */
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
tableU16[-2] = (U16) tableLog;
tableU16[-1] = (U16) maxSymbolValue;
/* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
for (u=1; u<=maxSymbolValue+1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else {
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
} }
cumul[maxSymbolValue+1] = tableSize+1;
}
/* Spread symbols */
{ U32 position = 0;
U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
int nbOccurences;
for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
position = (position + step) & tableMask;
while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
} }
if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
}
/* Build table */
{ U32 u; for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
} }
/* Build Symbol Transformation Table */
{ unsigned total = 0;
unsigned s;
for (s=0; s<=maxSymbolValue; s++) {
switch (normalizedCounter[s])
{
case 0: break;
case -1:
case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
symbolTT[s].deltaFindState = total - 1;
total ++;
break;
default :
{
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = total - normalizedCounter[s];
total += normalizedCounter[s];
} } } }
return 0;
}
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
}
#ifndef FSE_COMMONDEFS_ONLY
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
unsigned writeIsSafe)
{
BYTE* const ostart = (BYTE*) header;
BYTE* out = ostart;
BYTE* const oend = ostart + headerBufferSize;
int nbBits;
const int tableSize = 1 << tableLog;
int remaining;
int threshold;
U32 bitStream;
int bitCount;
unsigned charnum = 0;
int previous0 = 0;
bitStream = 0;
bitCount = 0;
/* Table Size */
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
bitCount += 4;
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
nbBits = tableLog+1;
while (remaining>1) { /* stops at 1 */
if (previous0) {
unsigned start = charnum;
while (!normalizedCounter[charnum]) charnum++;
while (charnum >= start+24) {
start+=24;
bitStream += 0xFFFFU << bitCount;
if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE) bitStream;
out[1] = (BYTE)(bitStream>>8);
out+=2;
bitStream>>=16;
}
while (charnum >= start+3) {
start+=3;
bitStream += 3 << bitCount;
bitCount += 2;
}
bitStream += (charnum-start) << bitCount;
bitCount += 2;
if (bitCount>16) {
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out += 2;
bitStream >>= 16;
bitCount -= 16;
} }
{ int count = normalizedCounter[charnum++];
int const max = (2*threshold-1)-remaining;
remaining -= count < 0 ? -count : count;
count++; /* +1 for extra accuracy */
if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitStream += count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previous0 = (count==1);
if (remaining<1) return ERROR(GENERIC);
while (remaining<threshold) nbBits--, threshold>>=1;
}
if (bitCount>16) {
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out += 2;
bitStream >>= 16;
bitCount -= 16;
} }
/* flush remaining bitStream */
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
return (out-ostart);
}
size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
}
/*-**************************************************************
* Counting histogram
****************************************************************/
/*! FSE_count_simple
This function counts byte values within `src`, and store the histogram into table `count`.
It doesn't use any additional memory.
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
For this reason, prefer using a table `count` with 256 elements.
@return : count of most numerous element
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) count[*ip++]++;
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
return (size_t)max;
}
/* FSE_count_parallel_wksp() :
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
static size_t FSE_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned checkMax, unsigned* const workSpace)
{
const BYTE* ip = (const BYTE*)source;
const BYTE* const iend = ip+sourceSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
U32* const Counting1 = workSpace;
U32* const Counting2 = Counting1 + 256;
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
memset(Counting1, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
memset(count, 0, maxSymbolValue + 1);
*maxSymbolValuePtr = 0;
return 0;
}
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32(ip); ip += 4;
while (ip < iend-15) {
U32 c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
}
ip-=4;
}
/* finish last symbols */
while (ip<iend) Counting1[*ip++]++;
if (checkMax) { /* verify stats will fit into destination table */
U32 s; for (s=255; s>maxSymbolValue; s--) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
{ U32 s; for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
return (size_t)max;
}
/* FSE_countFast_wksp() :
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace)
{
if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[1024];
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
}
/* FSE_count_wksp() :
* Same as FSE_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace)
{
if (*maxSymbolValuePtr < 255)
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
*maxSymbolValuePtr = 255;
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
}
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
unsigned tmpCounters[1024];
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
}
/*-**************************************************************
* FSE Compression Code
****************************************************************/
/*! FSE_sizeof_CTable() :
FSE_CTable is a variable size structure which contains :
`U16 tableLog;`
`U16 maxSymbolValue;`
`U16 nextStateNumber[1 << tableLog];` // This size is variable
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
Allocation is manual (C standard does not support variable-size structures).
*/
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
{
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
}
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size;
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
return minBits;
}
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
return tableLog;
}
unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
{
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
}
/* Secondary normalization method.
To be used when primary method fails. */
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
{
short const NOT_YET_ASSIGNED = -2;
U32 s;
U32 distributed = 0;
U32 ToDistribute;
/* Init */
U32 const lowThreshold = (U32)(total >> tableLog);
U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
for (s=0; s<=maxSymbolValue; s++) {
if (count[s] == 0) {
norm[s]=0;
continue;
}
if (count[s] <= lowThreshold) {
norm[s] = -1;
distributed++;
total -= count[s];
continue;
}
if (count[s] <= lowOne) {
norm[s] = 1;
distributed++;
total -= count[s];
continue;
}
norm[s]=NOT_YET_ASSIGNED;
}
ToDistribute = (1 << tableLog) - distributed;
if ((total / ToDistribute) > lowOne) {
/* risk of rounding to zero */
lowOne = (U32)((total * 3) / (ToDistribute * 2));
for (s=0; s<=maxSymbolValue; s++) {
if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
norm[s] = 1;
distributed++;
total -= count[s];
continue;
} }
ToDistribute = (1 << tableLog) - distributed;
}
if (distributed == maxSymbolValue+1) {
/* all values are pretty poor;
probably incompressible data (should have already been detected);
find max, then give all remaining points to max */
U32 maxV = 0, maxC = 0;
for (s=0; s<=maxSymbolValue; s++)
if (count[s] > maxC) maxV=s, maxC=count[s];
norm[maxV] += (short)ToDistribute;
return 0;
}
if (total == 0) {
/* all of the symbols were low enough for the lowOne or lowThreshold */
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
if (norm[s] > 0) ToDistribute--, norm[s]++;
return 0;
}
{ U64 const vStepLog = 62 - tableLog;
U64 const mid = (1ULL << (vStepLog-1)) - 1;
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
U64 tmpTotal = mid;
for (s=0; s<=maxSymbolValue; s++) {
if (norm[s]==NOT_YET_ASSIGNED) {
U64 const end = tmpTotal + (count[s] * rStep);
U32 const sStart = (U32)(tmpTotal >> vStepLog);
U32 const sEnd = (U32)(end >> vStepLog);
U32 const weight = sEnd - sStart;
if (weight < 1)
return ERROR(GENERIC);
norm[s] = (short)weight;
tmpTotal = end;
} } }
return 0;
}
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t total,
unsigned maxSymbolValue)
{
/* Sanity checks */
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
{ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
U64 const scale = 62 - tableLog;
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
U64 const vStep = 1ULL<<(scale-20);
int stillToDistribute = 1<<tableLog;
unsigned s;
unsigned largest=0;
short largestP=0;
U32 lowThreshold = (U32)(total >> tableLog);
for (s=0; s<=maxSymbolValue; s++) {
if (count[s] == total) return 0; /* rle special case */
if (count[s] == 0) { normalizedCounter[s]=0; continue; }
if (count[s] <= lowThreshold) {
normalizedCounter[s] = -1;
stillToDistribute--;
} else {
short proba = (short)((count[s]*step) >> scale);
if (proba<8) {
U64 restToBeat = vStep * rtbTable[proba];
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
}
if (proba > largestP) largestP=proba, largest=s;
normalizedCounter[s] = proba;
stillToDistribute -= proba;
} }
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
/* corner case, need another normalization method */
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
if (FSE_isError(errorCode)) return errorCode;
}
else normalizedCounter[largest] += (short)stillToDistribute;
}
#if 0
{ /* Print Table (debug) */
U32 s;
U32 nTotal = 0;
for (s=0; s<=maxSymbolValue; s++)
printf("%3i: %4i \n", s, normalizedCounter[s]);
for (s=0; s<=maxSymbolValue; s++)
nTotal += abs(normalizedCounter[s]);
if (nTotal != (1U<<tableLog))
printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
getchar();
}
#endif
return tableLog;
}
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
void* ptr = ct;
U16* tableU16 = ( (U16*) ptr) + 2;
void* FSCTptr = (U32*)ptr + 2;
FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
/* header */
tableU16[-2] = (U16) 0;
tableU16[-1] = (U16) symbolValue;
/* Build table */
tableU16[0] = 0;
tableU16[1] = 0; /* just in case */
/* Build Symbol Transformation Table */
symbolTT[symbolValue].deltaNbBits = 0;
symbolTT[symbolValue].deltaFindState = 0;
return 0;
}
static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
const void* src, size_t srcSize,
const FSE_CTable* ct, const unsigned fast)
{
const BYTE* const istart = (const BYTE*) src;
const BYTE* const iend = istart + srcSize;
const BYTE* ip=iend;
BIT_CStream_t bitC;
FSE_CState_t CState1, CState2;
/* init */
if (srcSize <= 2) return 0;
{ size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
if (srcSize & 1) {
FSE_initCState2(&CState1, ct, *--ip);
FSE_initCState2(&CState2, ct, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
FSE_FLUSHBITS(&bitC);
} else {
FSE_initCState2(&CState2, ct, *--ip);
FSE_initCState2(&CState1, ct, *--ip);
}
/* join to mod 4 */
srcSize -= 2;
if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
FSE_encodeSymbol(&bitC, &CState2, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
FSE_FLUSHBITS(&bitC);
}
/* 2 or 4 encoding per loop */
while ( ip>istart ) {
FSE_encodeSymbol(&bitC, &CState2, *--ip);
if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
FSE_FLUSHBITS(&bitC);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
FSE_encodeSymbol(&bitC, &CState2, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
}
FSE_FLUSHBITS(&bitC);
}
FSE_flushCState(&bitC, &CState2);
FSE_flushCState(&bitC, &CState1);
return BIT_closeCStream(&bitC);
}
size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
const void* src, size_t srcSize,
const FSE_CTable* ct)
{
unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
if (fast)
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
else
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
}
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
U32 count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
FSE_CTable* CTable = (FSE_CTable*)workSpace;
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
void* scratchBuffer = (void*)(CTable + CTableSize);
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
/* init conditions */
if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
if (srcSize <= 1) return 0; /* Not compressible */
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
}
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
/* Write table description header */
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += nc_err;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
return op-ostart;
}
typedef struct {
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
} fseWkspMax_t;
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
fseWkspMax_t scratchBuffer;
FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
}
#endif /* FSE_COMMONDEFS_ONLY */

684
thirdparty/zstd/compress/huf_compress.c vendored Normal file
View file

@ -0,0 +1,684 @@
/* ******************************************************************
Huffman encoder, part of New Generation Entropy library
Copyright (C) 2013-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* **************************************************************
* Compiler specifics
****************************************************************/
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
/* **************************************************************
* Includes
****************************************************************/
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
/* **************************************************************
* Error Management
****************************************************************/
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
/* **************************************************************
* Utils
****************************************************************/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
{
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
}
/* *******************************************************
* HUF : Huffman block compression
*********************************************************/
/* HUF_compressWeights() :
* Same as FSE_compress(), but dedicated to huff0's weights compression.
* The use case needs much less stack memory.
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
*/
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
U32 maxSymbolValue = HUF_TABLELOG_MAX;
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
U32 count[HUF_TABLELOG_MAX+1];
S16 norm[HUF_TABLELOG_MAX+1];
/* init conditions */
if (wtSize <= 1) return 0; /* Not compressible */
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
}
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
/* Write table description header */
{ CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += hSize;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
return op-ostart;
}
struct HUF_CElt_s {
U16 val;
BYTE nbBits;
}; /* typedef'd to HUF_CElt within "huf.h" */
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
{
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
BYTE* op = (BYTE*)dst;
U32 n;
/* check conditions */
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
/* convert to weight */
bitsToWeight[0] = 0;
for (n=1; n<huffLog+1; n++)
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
for (n=0; n<maxSymbolValue; n++)
huffWeight[n] = bitsToWeight[CTable[n].nbBits];
/* attempt weights compression by FSE */
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
op[0] = (BYTE)hSize;
return hSize+1;
} }
/* write raw values as 4-bits (max : 15) */
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
for (n=0; n<maxSymbolValue; n+=2)
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
return ((maxSymbolValue+1)/2) + 1;
}
size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
{
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
U32 tableLog = 0;
U32 nbSymbols = 0;
/* get symbol weights */
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
/* check result */
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
/* Prepare base value per rank */
{ U32 n, nextRankStart = 0;
for (n=1; n<=tableLog; n++) {
U32 current = nextRankStart;
nextRankStart += (rankVal[n] << (n-1));
rankVal[n] = current;
} }
/* fill nbBits */
{ U32 n; for (n=0; n<nbSymbols; n++) {
const U32 w = huffWeight[n];
CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
} }
/* fill val */
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
/* determine stating value per rank */
valPerRank[tableLog+1] = 0; /* for w==0 */
{ U16 min = 0;
U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
valPerRank[n] = min; /* get starting value within each rank */
min += nbPerRank[n];
min >>= 1;
} }
/* assign value within rank, symbol order */
{ U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
}
return readSize;
}
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
{
const U32 largestBits = huffNode[lastNonNull].nbBits;
if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
/* there are several too large elements (at least >= 2) */
{ int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits);
U32 n = lastNonNull;
while (huffNode[n].nbBits > maxNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
huffNode[n].nbBits = (BYTE)maxNbBits;
n --;
} /* n stops at huffNode[n].nbBits <= maxNbBits */
while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
/* renorm totalCost */
totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
/* repay normalized cost */
{ U32 const noSymbol = 0xF0F0F0F0;
U32 rankLast[HUF_TABLELOG_MAX+2];
int pos;
/* Get pos of last (smallest) symbol per rank */
memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits;
for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
rankLast[maxNbBits-currentNbBits] = pos;
} }
while (totalCost > 0) {
U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 highPos = rankLast[nBitsToDecrease];
U32 lowPos = rankLast[nBitsToDecrease-1];
if (highPos == noSymbol) continue;
if (lowPos == noSymbol) break;
{ U32 const highTotal = huffNode[highPos].count;
U32 const lowTotal = 2 * huffNode[lowPos].count;
if (highTotal <= lowTotal) break;
} }
/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
nBitsToDecrease ++;
totalCost -= 1 << (nBitsToDecrease-1);
if (rankLast[nBitsToDecrease-1] == noSymbol)
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
huffNode[rankLast[nBitsToDecrease]].nbBits ++;
if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
rankLast[nBitsToDecrease] = noSymbol;
else {
rankLast[nBitsToDecrease]--;
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
} } /* while (totalCost > 0) */
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
while (huffNode[n].nbBits == maxNbBits) n--;
huffNode[n+1].nbBits--;
rankLast[1] = n+1;
totalCost++;
continue;
}
huffNode[ rankLast[1] + 1 ].nbBits--;
rankLast[1]++;
totalCost ++;
} } } /* there are several too large elements (at least >= 2) */
return maxNbBits;
}
typedef struct {
U32 base;
U32 current;
} rankPos;
static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
{
rankPos rank[32];
U32 n;
memset(rank, 0, sizeof(rank));
for (n=0; n<=maxSymbolValue; n++) {
U32 r = BIT_highbit32(count[n] + 1);
rank[r].base ++;
}
for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
for (n=0; n<32; n++) rank[n].current = rank[n].base;
for (n=0; n<=maxSymbolValue; n++) {
U32 const c = count[n];
U32 const r = BIT_highbit32(c+1) + 1;
U32 pos = rank[r].current++;
while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
huffNode[pos].count = c;
huffNode[pos].byte = (BYTE)n;
}
}
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
nodeElt* const huffNode0 = (nodeElt*)workSpace;
nodeElt* const huffNode = huffNode0+1;
U32 n, nonNullRank;
int lowS, lowN;
U16 nodeNb = STARTNODE;
U32 nodeRoot;
/* safety checks */
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue);
/* init for parents */
nonNullRank = maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--;
lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
nodeNb++; lowS-=2;
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
/* create parents */
while (nodeNb <= nodeRoot) {
U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
huffNode[n1].parent = huffNode[n2].parent = nodeNb;
nodeNb++;
}
/* distribute weights (unlimited tree height) */
huffNode[nodeRoot].nbBits = 0;
for (n=nodeRoot-1; n>=STARTNODE; n--)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
for (n=0; n<=nonNullRank; n++)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
/* enforce maxTableLog */
maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
/* fill result into tree (val, nbBits) */
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
for (n=0; n<=nonNullRank; n++)
nbPerRank[huffNode[n].nbBits]++;
/* determine stating value per rank */
{ U16 min = 0;
for (n=maxNbBits; n>0; n--) {
valPerRank[n] = min; /* get starting value within each rank */
min += nbPerRank[n];
min >>= 1;
} }
for (n=0; n<=maxSymbolValue; n++)
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
for (n=0; n<=maxSymbolValue; n++)
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
}
return maxNbBits;
}
/** HUF_buildCTable() :
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
{
huffNodeTable nodeTable;
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
}
static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
{
size_t nbBits = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
nbBits += CTable[s].nbBits * count[s];
}
return nbBits >> 3;
}
static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
int bad = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
}
return !bad;
}
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
{
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
}
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
#define HUF_FLUSHBITS_1(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
#define HUF_FLUSHBITS_2(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
size_t n;
const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
BIT_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
{ size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
if (HUF_isError(initErr)) return 0; }
n = srcSize & ~3; /* join to mod 4 */
switch (srcSize & 3)
{
case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
HUF_FLUSHBITS_2(&bitC);
case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
HUF_FLUSHBITS_1(&bitC);
case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
HUF_FLUSHBITS(&bitC);
case 0 :
default: ;
}
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
HUF_FLUSHBITS_1(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
HUF_FLUSHBITS_2(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
HUF_FLUSHBITS_1(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
HUF_FLUSHBITS(&bitC);
}
return BIT_closeCStream(&bitC);
}
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
if (cSize==0) return 0;
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
if (cSize==0) return 0;
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
if (cSize==0) return 0;
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
}
ip += segmentSize;
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
if (cSize==0) return 0;
op += cSize;
}
return op-ostart;
}
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
unsigned singleStream, const HUF_CElt* CTable)
{
size_t const cSize = singleStream ?
HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
/* check compressibility */
if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
return op-ostart;
}
/* `workSpace` must a table of at least 1024 unsigned */
static size_t HUF_compress_internal (
void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
{
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
U32* count;
size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
HUF_CElt* CTable;
size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
/* checks & inits */
if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
if (!dstSize) return 0; /* cannot fit within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
count = (U32*)workSpace;
workSpace = (BYTE*)workSpace + countSize;
wkspSize -= countSize;
CTable = (HUF_CElt*)workSpace;
workSpace = (BYTE*)workSpace + CTableSize;
wkspSize -= CTableSize;
/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
}
/* Scan input and build symbol stats */
{ CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
}
/* Check validity of previous table */
if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
huffLog = (U32)maxBits;
/* Zero the unused symbols so we can check it for validity */
memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
}
/* Write table description header */
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
/* Check if using the previous table will be beneficial */
if (repeat && *repeat != HUF_repeat_none) {
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
}
}
/* Use the new table */
if (hSize + 12ul >= srcSize) { return 0; }
op += hSize;
if (repeat) { *repeat = HUF_repeat_none; }
if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
}
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
}
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
}
size_t HUF_compress1X (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
unsigned workSpace[1024];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
}
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
}
size_t HUF_compress2 (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
unsigned workSpace[1024];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
}

3598
thirdparty/zstd/compress/zstd_compress.c vendored Normal file

File diff suppressed because it is too large Load diff

921
thirdparty/zstd/compress/zstd_opt.h vendored Normal file
View file

@ -0,0 +1,921 @@
/**
* Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
/* Note : this file is intended to be included within zstd_compress.c */
#ifndef ZSTD_OPT_H_91842398743
#define ZSTD_OPT_H_91842398743
#define ZSTD_LITFREQ_ADD 2
#define ZSTD_FREQ_DIV 4
#define ZSTD_MAX_PRICE (1<<30)
/*-*************************************
* Price functions for optimal parser
***************************************/
FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
{
ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1);
ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1);
ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1);
ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
}
MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
{
unsigned u;
ssPtr->cachedLiterals = NULL;
ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
ssPtr->staticPrices = 0;
if (ssPtr->litLengthSum == 0) {
if (srcSize <= 1024) ssPtr->staticPrices = 1;
for (u=0; u<=MaxLit; u++)
ssPtr->litFreq[u] = 0;
for (u=0; u<srcSize; u++)
ssPtr->litFreq[src[u]]++;
ssPtr->litSum = 0;
ssPtr->litLengthSum = MaxLL+1;
ssPtr->matchLengthSum = MaxML+1;
ssPtr->offCodeSum = (MaxOff+1);
ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
for (u=0; u<=MaxLit; u++) {
ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
ssPtr->litSum += ssPtr->litFreq[u];
}
for (u=0; u<=MaxLL; u++)
ssPtr->litLengthFreq[u] = 1;
for (u=0; u<=MaxML; u++)
ssPtr->matchLengthFreq[u] = 1;
for (u=0; u<=MaxOff; u++)
ssPtr->offCodeFreq[u] = 1;
} else {
ssPtr->matchLengthSum = 0;
ssPtr->litLengthSum = 0;
ssPtr->offCodeSum = 0;
ssPtr->matchSum = 0;
ssPtr->litSum = 0;
for (u=0; u<=MaxLit; u++) {
ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
ssPtr->litSum += ssPtr->litFreq[u];
}
for (u=0; u<=MaxLL; u++) {
ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
}
for (u=0; u<=MaxML; u++) {
ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
}
ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
for (u=0; u<=MaxOff; u++) {
ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
}
}
ZSTD_setLog2Prices(ssPtr);
}
FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
{
U32 price, u;
if (ssPtr->staticPrices)
return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
if (litLength == 0)
return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
/* literals */
if (ssPtr->cachedLiterals == literals) {
U32 const additional = litLength - ssPtr->cachedLitLength;
const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
for (u=0; u < additional; u++)
price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1);
ssPtr->cachedPrice = price;
ssPtr->cachedLitLength = litLength;
} else {
price = litLength * ssPtr->log2litSum;
for (u=0; u < litLength; u++)
price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1);
if (litLength >= 12) {
ssPtr->cachedLiterals = literals;
ssPtr->cachedPrice = price;
ssPtr->cachedLitLength = litLength;
}
}
/* literal Length */
{ const BYTE LL_deltaCode = 19;
const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
}
return price;
}
FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
{
/* offset */
U32 price;
BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
if (seqStorePtr->staticPrices)
return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
if (!ultra && offCode >= 20) price += (offCode-19)*2;
/* match Length */
{ const BYTE ML_deltaCode = 36;
const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
}
return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
}
MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
{
U32 u;
/* literals */
seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
for (u=0; u < litLength; u++)
seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
/* literal Length */
{ const BYTE LL_deltaCode = 19;
const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
seqStorePtr->litLengthFreq[llCode]++;
seqStorePtr->litLengthSum++;
}
/* match offset */
{ BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
seqStorePtr->offCodeSum++;
seqStorePtr->offCodeFreq[offCode]++;
}
/* match Length */
{ const BYTE ML_deltaCode = 36;
const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
seqStorePtr->matchLengthFreq[mlCode]++;
seqStorePtr->matchLengthSum++;
}
ZSTD_setLog2Prices(seqStorePtr);
}
#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
{ \
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
opt[pos].mlen = mlen_; \
opt[pos].off = offset_; \
opt[pos].litlen = litlen_; \
opt[pos].price = price_; \
}
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
FORCE_INLINE
U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
{
U32* const hashTable3 = zc->hashTable3;
U32 const hashLog3 = zc->hashLog3;
const BYTE* const base = zc->base;
U32 idx = zc->nextToUpdate3;
const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
while(idx < target) {
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
idx++;
}
return hashTable3[hash3];
}
/*-*************************************
* Binary Tree search
***************************************/
static U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
U32 nbCompares, const U32 mls,
U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
{
const BYTE* const base = zc->base;
const U32 current = (U32)(ip-base);
const U32 hashLog = zc->params.cParams.hashLog;
const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
U32* const hashTable = zc->hashTable;
U32 matchIndex = hashTable[h];
U32* const bt = zc->chainTable;
const U32 btLog = zc->params.cParams.chainLog - 1;
const U32 btMask= (1U << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const U32 btLow = btMask >= current ? 0 : current - btMask;
const U32 windowLow = zc->lowLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8;
U32 dummy32; /* to be nullified at the end */
U32 mnum = 0;
const U32 minMatch = (mls == 3) ? 3 : 4;
size_t bestLength = minMatchLen-1;
if (minMatch == 3) { /* HC3 match finder */
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
const BYTE* match;
size_t currentMl=0;
if ((!extDict) || matchIndex3 >= dictLimit) {
match = base + matchIndex3;
if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
} else {
match = dictBase + matchIndex3;
if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
}
/* save best solution */
if (currentMl > bestLength) {
bestLength = currentMl;
matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
matches[mnum].len = (U32)currentMl;
mnum++;
if (currentMl > ZSTD_OPT_NUM) goto update;
if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
}
}
}
hashTable[h] = current; /* Update Hash Table */
while (nbCompares-- && (matchIndex > windowLow)) {
U32* nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match;
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex;
if (match[matchLength] == ip[matchLength]) {
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
}
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
if (matchLength > bestLength) {
if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength;
matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
matches[mnum].len = (U32)matchLength;
mnum++;
if (matchLength > ZSTD_OPT_NUM) break;
if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */
break; /* drop, to guarantee consistency (miss a little bit of compression) */
}
if (match[matchLength] < ip[matchLength]) {
/* match is smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
} else {
/* match is larger than current */
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
*smallerPtr = *largerPtr = 0;
update:
zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
return mnum;
}
/** Tree updater, providing best match */
static U32 ZSTD_BtGetAllMatches (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
{
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
}
static U32 ZSTD_BtGetAllMatches_selectMLS (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit,
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
{
switch(matchLengthSearch)
{
case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
default :
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
case 7 :
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
}
}
/** Tree updater, providing best match */
static U32 ZSTD_BtGetAllMatches_extDict (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
{
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
}
static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit,
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
{
switch(matchLengthSearch)
{
case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
default :
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
case 7 :
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
}
}
/*-*******************************
* Optimal parser
*********************************/
FORCE_INLINE
void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize, const int ultra)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ctx->base;
const BYTE* const prefixStart = base + ctx->dictLimit;
const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
const U32 sufficient_len = ctx->params.cParams.targetLength;
const U32 mls = ctx->params.cParams.searchLength;
const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
ZSTD_match_t* matches = seqStorePtr->matchTable;
const BYTE* inr;
U32 offset, rep[ZSTD_REP_NUM];
/* init */
ctx->nextToUpdate3 = ctx->nextToUpdate;
ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
ip += (ip==prefixStart);
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
/* Match Loop */
while (ip < ilimit) {
U32 cur, match_num, last_pos, litlen, price;
U32 u, mlen, best_mlen, best_off, litLength;
memset(opt, 0, sizeof(ZSTD_optimal_t));
last_pos = 0;
litlen = (U32)(ip - anchor);
/* check repCode */
{ U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
for (i=(ip == anchor); i<last_i; i++) {
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
&& (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
goto _storeSequence;
}
best_off = i - (ip == anchor);
do {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
if (mlen > last_pos || price < opt[mlen].price)
SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
mlen--;
} while (mlen >= minMatch);
} } }
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
if (!last_pos && !match_num) { ip++; continue; }
if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
best_mlen = matches[match_num-1].len;
best_off = matches[match_num-1].off;
cur = 0;
last_pos = 1;
goto _storeSequence;
}
/* set prices using matches at position = 0 */
best_mlen = (last_pos) ? last_pos : minMatch;
for (u = 0; u < match_num; u++) {
mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
best_mlen = matches[u].len;
while (mlen <= best_mlen) {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
if (mlen > last_pos || price < opt[mlen].price)
SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
mlen++;
} }
if (last_pos < minMatch) { ip++; continue; }
/* initialize opt[0] */
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
opt[0].mlen = 1;
opt[0].litlen = litlen;
/* check further positions */
for (cur = 1; cur <= last_pos; cur++) {
inr = ip + cur;
if (opt[cur-1].mlen == 1) {
litlen = opt[cur-1].litlen + 1;
if (cur > litlen) {
price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
} else
price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
} else {
litlen = 1;
price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
}
if (cur > last_pos || price <= opt[cur].price)
SET_PRICE(cur, 1, 0, litlen, price);
if (cur == last_pos) break;
if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
continue;
mlen = opt[cur].mlen;
if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
opt[cur].rep[2] = opt[cur-mlen].rep[1];
opt[cur].rep[1] = opt[cur-mlen].rep[0];
opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
} else {
opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
}
best_mlen = minMatch;
{ U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
&& (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
best_mlen = mlen; best_off = i; last_pos = cur + 1;
goto _storeSequence;
}
best_off = i - (opt[cur].mlen != 1);
if (mlen > best_mlen) best_mlen = mlen;
do {
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen) {
price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
} else
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
} else {
litlen = 0;
price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
}
if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
SET_PRICE(cur + mlen, mlen, i, litlen, price);
mlen--;
} while (mlen >= minMatch);
} } }
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
best_mlen = matches[match_num-1].len;
best_off = matches[match_num-1].off;
last_pos = cur + 1;
goto _storeSequence;
}
/* set prices using matches at position = cur */
for (u = 0; u < match_num; u++) {
mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
best_mlen = matches[u].len;
while (mlen <= best_mlen) {
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen)
price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
else
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
} else {
litlen = 0;
price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
}
if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
mlen++;
} } }
best_mlen = opt[last_pos].mlen;
best_off = opt[last_pos].off;
cur = last_pos - best_mlen;
/* store sequence */
_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
opt[0].mlen = 1;
while (1) {
mlen = opt[cur].mlen;
offset = opt[cur].off;
opt[cur].mlen = best_mlen;
opt[cur].off = best_off;
best_mlen = mlen;
best_off = offset;
if (mlen > cur) break;
cur -= mlen;
}
for (u = 0; u <= last_pos;) {
u += opt[u].mlen;
}
for (cur=0; cur < last_pos; ) {
mlen = opt[cur].mlen;
if (mlen == 1) { ip++; cur++; continue; }
offset = opt[cur].off;
cur += mlen;
litLength = (U32)(ip - anchor);
if (offset > ZSTD_REP_MOVE_OPT) {
rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = offset - ZSTD_REP_MOVE_OPT;
offset--;
} else {
if (offset != 0) {
best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
if (offset != 1) rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = best_off;
}
if (litLength==0) offset--;
}
ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
anchor = ip = ip + mlen;
} } /* for (cur=0; cur < last_pos; ) */
/* Save reps for next block */
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
/* Last Literals */
{ size_t const lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
FORCE_INLINE
void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize, const int ultra)
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ctx->base;
const U32 lowestIndex = ctx->lowLimit;
const U32 dictLimit = ctx->dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictBase = ctx->dictBase;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
const U32 sufficient_len = ctx->params.cParams.targetLength;
const U32 mls = ctx->params.cParams.searchLength;
const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
ZSTD_match_t* matches = seqStorePtr->matchTable;
const BYTE* inr;
/* init */
U32 offset, rep[ZSTD_REP_NUM];
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
ctx->nextToUpdate3 = ctx->nextToUpdate;
ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
ip += (ip==prefixStart);
/* Match Loop */
while (ip < ilimit) {
U32 cur, match_num, last_pos, litlen, price;
U32 u, mlen, best_mlen, best_off, litLength;
U32 current = (U32)(ip-base);
memset(opt, 0, sizeof(ZSTD_optimal_t));
last_pos = 0;
opt[0].litlen = (U32)(ip - anchor);
/* check repCode */
{ U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
for (i = (ip==anchor); i<last_i; i++) {
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
const U32 repIndex = (U32)(current - repCur);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if ( (repCur > 0 && repCur <= (S32)current)
&& (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
&& (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
goto _storeSequence;
}
best_off = i - (ip==anchor);
litlen = opt[0].litlen;
do {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
if (mlen > last_pos || price < opt[mlen].price)
SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
mlen--;
} while (mlen >= minMatch);
} } }
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
if (!last_pos && !match_num) { ip++; continue; }
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
opt[0].mlen = 1;
if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
best_mlen = matches[match_num-1].len;
best_off = matches[match_num-1].off;
cur = 0;
last_pos = 1;
goto _storeSequence;
}
best_mlen = (last_pos) ? last_pos : minMatch;
/* set prices using matches at position = 0 */
for (u = 0; u < match_num; u++) {
mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
best_mlen = matches[u].len;
litlen = opt[0].litlen;
while (mlen <= best_mlen) {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
if (mlen > last_pos || price < opt[mlen].price)
SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
mlen++;
} }
if (last_pos < minMatch) {
ip++; continue;
}
/* check further positions */
for (cur = 1; cur <= last_pos; cur++) {
inr = ip + cur;
if (opt[cur-1].mlen == 1) {
litlen = opt[cur-1].litlen + 1;
if (cur > litlen) {
price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
} else
price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
} else {
litlen = 1;
price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
}
if (cur > last_pos || price <= opt[cur].price)
SET_PRICE(cur, 1, 0, litlen, price);
if (cur == last_pos) break;
if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
continue;
mlen = opt[cur].mlen;
if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
opt[cur].rep[2] = opt[cur-mlen].rep[1];
opt[cur].rep[1] = opt[cur-mlen].rep[0];
opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
} else {
opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
}
best_mlen = minMatch;
{ U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
for (i = (mlen != 1); i<last_i; i++) {
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
const U32 repIndex = (U32)(current+cur - repCur);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if ( (repCur > 0 && repCur <= (S32)(current+cur))
&& (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
&& (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
best_mlen = mlen; best_off = i; last_pos = cur + 1;
goto _storeSequence;
}
best_off = i - (opt[cur].mlen != 1);
if (mlen > best_mlen) best_mlen = mlen;
do {
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen) {
price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
} else
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
} else {
litlen = 0;
price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
}
if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
SET_PRICE(cur + mlen, mlen, i, litlen, price);
mlen--;
} while (mlen >= minMatch);
} } }
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
best_mlen = matches[match_num-1].len;
best_off = matches[match_num-1].off;
last_pos = cur + 1;
goto _storeSequence;
}
/* set prices using matches at position = cur */
for (u = 0; u < match_num; u++) {
mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
best_mlen = matches[u].len;
while (mlen <= best_mlen) {
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen)
price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
else
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
} else {
litlen = 0;
price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
}
if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
mlen++;
} } } /* for (cur = 1; cur <= last_pos; cur++) */
best_mlen = opt[last_pos].mlen;
best_off = opt[last_pos].off;
cur = last_pos - best_mlen;
/* store sequence */
_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
opt[0].mlen = 1;
while (1) {
mlen = opt[cur].mlen;
offset = opt[cur].off;
opt[cur].mlen = best_mlen;
opt[cur].off = best_off;
best_mlen = mlen;
best_off = offset;
if (mlen > cur) break;
cur -= mlen;
}
for (u = 0; u <= last_pos; ) {
u += opt[u].mlen;
}
for (cur=0; cur < last_pos; ) {
mlen = opt[cur].mlen;
if (mlen == 1) { ip++; cur++; continue; }
offset = opt[cur].off;
cur += mlen;
litLength = (U32)(ip - anchor);
if (offset > ZSTD_REP_MOVE_OPT) {
rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = offset - ZSTD_REP_MOVE_OPT;
offset--;
} else {
if (offset != 0) {
best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
if (offset != 1) rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = best_off;
}
if (litLength==0) offset--;
}
ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
anchor = ip = ip + mlen;
} } /* for (cur=0; cur < last_pos; ) */
/* Save reps for next block */
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
/* Last Literals */
{ size_t lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
}
#endif /* ZSTD_OPT_H_91842398743 */

View file

@ -0,0 +1,751 @@
/**
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
/* ====== Tuning parameters ====== */
#define ZSTDMT_NBTHREADS_MAX 128
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
#endif
/* ====== Dependencies ====== */
#include <stdlib.h> /* malloc */
#include <string.h> /* memcpy */
#include "pool.h" /* threadpool */
#include "threading.h" /* mutex */
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstdmt_compress.h"
/* ====== Debug ====== */
#if 0
# include <stdio.h>
# include <unistd.h>
# include <sys/times.h>
static unsigned g_debugLevel = 5;
# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
# define DEBUG_PRINTHEX(l,p,n) { \
unsigned debug_u; \
for (debug_u=0; debug_u<(n); debug_u++) \
DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
DEBUGLOGRAW(l, " \n"); \
}
static unsigned long long GetCurrentClockTimeMicroseconds(void)
{
static clock_t _ticksPerSecond = 0;
if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
}
#define MUTEX_WAIT_TIME_DLEVEL 5
#define PTHREAD_MUTEX_LOCK(mutex) \
if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
pthread_mutex_lock(mutex); \
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
unsigned long long const elapsedTime = (afterTime-beforeTime); \
if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
elapsedTime, #mutex); \
} } \
} else pthread_mutex_lock(mutex);
#else
# define DEBUGLOG(l, ...) {} /* disabled */
# define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
# define DEBUG_PRINTHEX(l,p,n) {}
#endif
/* ===== Buffer Pool ===== */
typedef struct buffer_s {
void* start;
size_t size;
} buffer_t;
static const buffer_t g_nullBuffer = { NULL, 0 };
typedef struct ZSTDMT_bufferPool_s {
unsigned totalBuffers;
unsigned nbBuffers;
buffer_t bTable[1]; /* variable size */
} ZSTDMT_bufferPool;
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
{
unsigned const maxNbBuffers = 2*nbThreads + 2;
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
if (bufPool==NULL) return NULL;
bufPool->totalBuffers = maxNbBuffers;
bufPool->nbBuffers = 0;
return bufPool;
}
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
{
unsigned u;
if (!bufPool) return; /* compatibility with free on NULL */
for (u=0; u<bufPool->totalBuffers; u++)
free(bufPool->bTable[u].start);
free(bufPool);
}
/* assumption : invocation from main thread only ! */
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
{
if (pool->nbBuffers) { /* try to use an existing buffer */
buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
size_t const availBufferSize = buf.size;
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
return buf;
free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
}
/* create new buffer */
{ buffer_t buffer;
void* const start = malloc(bSize);
if (start==NULL) bSize = 0;
buffer.start = start; /* note : start can be NULL if malloc fails ! */
buffer.size = bSize;
return buffer;
}
}
/* store buffer for later re-use, up to pool capacity */
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
{
if (buf.start == NULL) return; /* release on NULL */
if (pool->nbBuffers < pool->totalBuffers) {
pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
return;
}
/* Reached bufferPool capacity (should not happen) */
free(buf.start);
}
/* ===== CCtx Pool ===== */
typedef struct {
unsigned totalCCtx;
unsigned availCCtx;
ZSTD_CCtx* cctx[1]; /* variable size */
} ZSTDMT_CCtxPool;
/* assumption : CCtxPool invocation only from main thread */
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
{
unsigned u;
for (u=0; u<pool->totalCCtx; u++)
ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
free(pool);
}
/* ZSTDMT_createCCtxPool() :
* implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
{
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
if (!cctxPool) return NULL;
cctxPool->totalCCtx = nbThreads;
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
cctxPool->cctx[0] = ZSTD_createCCtx();
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
return cctxPool;
}
static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
{
if (pool->availCCtx) {
pool->availCCtx--;
return pool->cctx[pool->availCCtx];
}
return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
}
static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
{
if (cctx==NULL) return; /* compatibility with release on NULL */
if (pool->availCCtx < pool->totalCCtx)
pool->cctx[pool->availCCtx++] = cctx;
else
/* pool overflow : should not happen, since totalCCtx==nbThreads */
ZSTD_freeCCtx(cctx);
}
/* ===== Thread worker ===== */
typedef struct {
buffer_t buffer;
size_t filled;
} inBuff_t;
typedef struct {
ZSTD_CCtx* cctx;
buffer_t src;
const void* srcStart;
size_t srcSize;
size_t dictSize;
buffer_t dstBuff;
size_t cSize;
size_t dstFlushed;
unsigned firstChunk;
unsigned lastChunk;
unsigned jobCompleted;
unsigned jobScanned;
pthread_mutex_t* jobCompleted_mutex;
pthread_cond_t* jobCompleted_cond;
ZSTD_parameters params;
ZSTD_CDict* cdict;
unsigned long long fullFrameSize;
} ZSTDMT_jobDescription;
/* ZSTDMT_compressChunk() : POOL_function type */
void ZSTDMT_compressChunk(void* jobDescription)
{
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
const void* const src = (const char*)job->srcStart + job->dictSize;
buffer_t const dstBuff = job->dstBuff;
DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
if (job->cdict) { /* should only happen for first segment */
size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
if (job->cdict) DEBUGLOG(3, "using CDict ");
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
} else { /* srcStart points at reloaded section */
if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
{ size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
} }
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
ZSTD_invalidateRepCodes(job->cctx);
}
DEBUGLOG(4, "Compressing : ");
DEBUG_PRINTHEX(4, job->srcStart, 12);
job->cSize = (job->lastChunk) ?
ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
(unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
_endJob:
PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
job->jobCompleted = 1;
job->jobScanned = 0;
pthread_cond_signal(job->jobCompleted_cond);
pthread_mutex_unlock(job->jobCompleted_mutex);
}
/* ------------------------------------------ */
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
struct ZSTDMT_CCtx_s {
POOL_ctx* factory;
ZSTDMT_bufferPool* buffPool;
ZSTDMT_CCtxPool* cctxPool;
pthread_mutex_t jobCompleted_mutex;
pthread_cond_t jobCompleted_cond;
size_t targetSectionSize;
size_t marginSize;
size_t inBuffSize;
size_t dictSize;
size_t targetDictSize;
inBuff_t inBuff;
ZSTD_parameters params;
XXH64_state_t xxhState;
unsigned nbThreads;
unsigned jobIDMask;
unsigned doneJobID;
unsigned nextJobID;
unsigned frameEnded;
unsigned allJobsCompleted;
unsigned overlapRLog;
unsigned long long frameContentSize;
size_t sectionSize;
ZSTD_CDict* cdict;
ZSTD_CStream* cstream;
ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
};
ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
{
ZSTDMT_CCtx* cctx;
U32 const minNbJobs = nbThreads + 2;
U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
U32 const nbJobs = 1 << nbJobsLog2;
DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
nbThreads, minNbJobs, nbJobsLog2, nbJobs);
if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
if (!cctx) return NULL;
cctx->nbThreads = nbThreads;
cctx->jobIDMask = nbJobs - 1;
cctx->allJobsCompleted = 1;
cctx->sectionSize = 0;
cctx->overlapRLog = 3;
cctx->factory = POOL_create(nbThreads, 1);
cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
ZSTDMT_freeCCtx(cctx);
return NULL;
}
if (nbThreads==1) {
cctx->cstream = ZSTD_createCStream();
if (!cctx->cstream) {
ZSTDMT_freeCCtx(cctx); return NULL;
} }
pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
pthread_cond_init(&cctx->jobCompleted_cond, NULL);
DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
return cctx;
}
/* ZSTDMT_releaseAllJobResources() :
* Ensure all workers are killed first. */
static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
{
unsigned jobID;
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
mtctx->jobs[jobID].src = g_nullBuffer;
ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
mtctx->jobs[jobID].cctx = NULL;
}
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
mtctx->inBuff.buffer = g_nullBuffer;
mtctx->allJobsCompleted = 1;
}
size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
{
if (mtctx==NULL) return 0; /* compatible with free on NULL */
POOL_free(mtctx->factory);
if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
ZSTDMT_freeCCtxPool(mtctx->cctxPool);
ZSTD_freeCDict(mtctx->cdict);
ZSTD_freeCStream(mtctx->cstream);
pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
pthread_cond_destroy(&mtctx->jobCompleted_cond);
free(mtctx);
return 0;
}
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
{
switch(parameter)
{
case ZSTDMT_p_sectionSize :
mtctx->sectionSize = value;
return 0;
case ZSTDMT_p_overlapSectionLog :
DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
return 0;
default :
return ERROR(compressionParameter_unsupported);
}
}
/* ------------------------------------------ */
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel)
{
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog);
size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1;
unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
size_t remainingSrcSize = srcSize;
const char* const srcStart = (const char*)src;
unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
size_t frameStartPos = 0, dstBufferPos = 0;
DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
params.fParams.contentSizeFlag = 1;
if (nbChunks==1) { /* fallback to single-thread mode */
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
}
{ unsigned u;
for (u=0; u<nbChunks; u++) {
size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
size_t dictSize = u ? overlapSize : 0;
if ((cctx==NULL) || (dstBuffer.start==NULL)) {
mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
mtctx->jobs[u].jobCompleted = 1;
nbChunks = u+1;
break; /* let's wait for previous jobs to complete, but don't start new ones */
}
mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
mtctx->jobs[u].dictSize = dictSize;
mtctx->jobs[u].srcSize = chunkSize;
mtctx->jobs[u].fullFrameSize = srcSize;
mtctx->jobs[u].params = params;
mtctx->jobs[u].dstBuff = dstBuffer;
mtctx->jobs[u].cctx = cctx;
mtctx->jobs[u].firstChunk = (u==0);
mtctx->jobs[u].lastChunk = (u==nbChunks-1);
mtctx->jobs[u].jobCompleted = 0;
mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
frameStartPos += chunkSize;
dstBufferPos += dstBufferCapacity;
remainingSrcSize -= chunkSize;
} }
/* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
{ unsigned chunkID;
size_t error = 0, dstPos = 0;
for (chunkID=0; chunkID<nbChunks; chunkID++) {
DEBUGLOG(3, "waiting for chunk %u ", chunkID);
PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
while (mtctx->jobs[chunkID].jobCompleted==0) {
DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
}
pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
DEBUGLOG(3, "ready to write chunk %u ", chunkID);
ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
mtctx->jobs[chunkID].cctx = NULL;
mtctx->jobs[chunkID].srcStart = NULL;
{ size_t const cSize = mtctx->jobs[chunkID].cSize;
if (ZSTD_isError(cSize)) error = cSize;
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
if (chunkID) { /* note : chunk 0 is already written directly into dst */
if (!error)
memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap if chunk decompressed within dst */
if (chunkID >= compressWithinDst) /* otherwise, it decompresses within dst */
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
}
dstPos += cSize ;
}
}
if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
return error ? error : dstPos;
}
}
/* ====================================== */
/* ======= Streaming API ======= */
/* ====================================== */
static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
while (zcs->doneJobID < zcs->nextJobID) {
unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
while (zcs->jobs[jobID].jobCompleted==0) {
DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
}
pthread_mutex_unlock(&zcs->jobCompleted_mutex);
zcs->doneJobID++;
}
}
static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
const void* dict, size_t dictSize, unsigned updateDict,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
ZSTD_customMem const cmem = { NULL, NULL, NULL };
DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
ZSTDMT_waitForAllJobsCompleted(zcs);
ZSTDMT_releaseAllJobResources(zcs);
zcs->allJobsCompleted = 1;
}
zcs->params = params;
if (updateDict) {
ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
if (dict && dictSize) {
zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem);
if (zcs->cdict == NULL) return ERROR(memory_allocation);
} }
zcs->frameContentSize = pledgedSrcSize;
zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
zcs->marginSize = zcs->targetSectionSize >> 2;
zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
zcs->inBuff.filled = 0;
zcs->dictSize = 0;
zcs->doneJobID = 0;
zcs->nextJobID = 0;
zcs->frameEnded = 0;
zcs->allJobsCompleted = 0;
if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
return 0;
}
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
}
/* ZSTDMT_resetCStream() :
* pledgedSrcSize is optional and can be zero == unknown */
size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
{
if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
}
size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
}
static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
{
size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
if ((cctx==NULL) || (dstBuffer.start==NULL)) {
zcs->jobs[jobID].jobCompleted = 1;
zcs->nextJobID++;
ZSTDMT_waitForAllJobsCompleted(zcs);
ZSTDMT_releaseAllJobResources(zcs);
return ERROR(memory_allocation);
}
DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
zcs->jobs[jobID].src = zcs->inBuff.buffer;
zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
zcs->jobs[jobID].srcSize = srcSize;
zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
zcs->jobs[jobID].params = zcs->params;
if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
zcs->jobs[jobID].dstBuff = dstBuffer;
zcs->jobs[jobID].cctx = cctx;
zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
zcs->jobs[jobID].lastChunk = endFrame;
zcs->jobs[jobID].jobCompleted = 0;
zcs->jobs[jobID].dstFlushed = 0;
zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
/* get a new buffer for next input */
if (!endFrame) {
size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
zcs->jobs[jobID].jobCompleted = 1;
zcs->nextJobID++;
ZSTDMT_waitForAllJobsCompleted(zcs);
ZSTDMT_releaseAllJobResources(zcs);
return ERROR(memory_allocation);
}
DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
DEBUGLOG(5, "new inBuff pre-filled");
zcs->dictSize = newDictSize;
} else {
zcs->inBuff.buffer = g_nullBuffer;
zcs->inBuff.filled = 0;
zcs->dictSize = 0;
zcs->frameEnded = 1;
if (zcs->nextJobID == 0)
zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
}
DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
zcs->nextJobID++;
return 0;
}
/* ZSTDMT_flushNextJob() :
* output : will be updated with amount of data flushed .
* blockToFlush : if >0, the function will block and wait if there is no data available to flush .
* @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
{
unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
while (zcs->jobs[wJobID].jobCompleted==0) {
DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
}
pthread_mutex_unlock(&zcs->jobCompleted_mutex);
/* compression job completed : output can be flushed */
{ ZSTDMT_jobDescription job = zcs->jobs[wJobID];
if (!job.jobScanned) {
if (ZSTD_isError(job.cSize)) {
DEBUGLOG(5, "compression error detected ");
ZSTDMT_waitForAllJobsCompleted(zcs);
ZSTDMT_releaseAllJobResources(zcs);
return job.cSize;
}
ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
zcs->jobs[wJobID].cctx = NULL;
DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
if (zcs->params.fParams.checksumFlag) {
XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
DEBUGLOG(4, "writing checksum : %08X \n", checksum);
MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
job.cSize += 4;
zcs->jobs[wJobID].cSize += 4;
} }
ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
zcs->jobs[wJobID].srcStart = NULL;
zcs->jobs[wJobID].src = g_nullBuffer;
zcs->jobs[wJobID].jobScanned = 1;
}
{ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
output->pos += toWrite;
job.dstFlushed += toWrite;
}
if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
zcs->jobs[wJobID].dstBuff = g_nullBuffer;
zcs->jobs[wJobID].jobCompleted = 0;
zcs->doneJobID++;
} else {
zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
}
/* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
return 0; /* everything flushed */
} }
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
/* fill input buffer */
{ size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
input->pos += toLoad;
zcs->inBuff.filled += toLoad;
}
if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
&& (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
}
/* check for data to flush */
CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
/* recommended next input size : fill current input buffer */
return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
}
static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
{
size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
&& (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
}
/* check if there is any data available to flush */
DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
return ZSTDMT_flushNextJob(zcs, output, 1);
}
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
{
if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
return ZSTDMT_flushStream_internal(zcs, output, 0);
}
size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
{
if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
return ZSTDMT_flushStream_internal(zcs, output, 1);
}

View file

@ -0,0 +1,78 @@
/**
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#ifndef ZSTDMT_COMPRESS_H
#define ZSTDMT_COMPRESS_H
#if defined (__cplusplus)
extern "C" {
#endif
/* Note : All prototypes defined in this file shall be considered experimental.
* There is no guarantee of API continuity (yet) on any of these prototypes */
/* === Dependencies === */
#include <stddef.h> /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* === Simple one-pass functions === */
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
/* === Streaming functions === */
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
/* === Advanced functions and parameters === */
#ifndef ZSTDMT_SECTION_SIZE_MIN
# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
#endif
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */
ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
/* ZSDTMT_parameter :
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
typedef enum {
ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
} ZSDTMT_parameter;
/* ZSTDMT_setMTCtxParameter() :
* allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
* The function must be called typically after ZSTD_createCCtx().
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTDMT_COMPRESS_H */