zstd: Update to upstream version 1.4.8

This commit is contained in:
Rémi Verschelde 2021-01-08 11:21:43 +01:00
parent 0abbefd94a
commit 3645c9c80c
No known key found for this signature in database
GPG key ID: C3336907360768E1
48 changed files with 4405 additions and 2089 deletions

View file

@ -17,7 +17,6 @@
#if defined (__cplusplus)
extern "C" {
#endif
/*
* This API consists of small unitary functions, which must be inlined for best performance.
* Since link-time-optimization is not available for all compilers,
@ -36,10 +35,12 @@ extern "C" {
/*=========================================
* Target specific
=========================================*/
#if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
#elif defined(__ICCARM__)
# include <intrinsics.h>
#ifndef ZSTD_NO_INTRINSICS
# if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
# elif defined(__ICCARM__)
# include <intrinsics.h>
# endif
#endif
#define STREAM_ACCUMULATOR_MIN_32 25
@ -141,8 +142,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
# if STATIC_BMI2 == 1
return _lzcnt_u32(val) ^ 31;
# else
unsigned long r = 0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
@ -198,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
{
MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@ -271,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
*/
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
{
if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
bitD->start = (const char*)srcBuffer;
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@ -317,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
return srcSize;
}
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
{
return bitContainer >> start;
}
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
{
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
@ -330,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
}
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
return _bzhi_u64(bitContainer, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_lookBits() :
@ -342,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
* On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56.
* @return : value extracted */
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
/* arbitrate between double-shift and shift+mask */
#if 1
@ -365,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{
bitD->bitsConsumed += nbBits;
}
@ -374,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);

View file

@ -38,6 +38,17 @@
#endif
/**
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
This explictly marks such functions as __cdecl so that the code will still compile
if a CC other than __cdecl has been made the default.
*/
#if defined(_MSC_VER)
# define WIN_CDECL __cdecl
#else
# define WIN_CDECL
#endif
/**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant
@ -114,12 +125,12 @@
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# elif defined(__aarch64__)
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# elif defined(__aarch64__)
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
# else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
@ -172,4 +183,106 @@
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
#endif
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
#ifndef STATIC_BMI2
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
# define STATIC_BMI2 1
# endif
# endif
#endif
#ifndef STATIC_BMI2
#define STATIC_BMI2 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* detects whether we are being compiled under msan */
#ifndef ZSTD_MEMORY_SANITIZER
# if __has_feature(memory_sanitizer)
# define ZSTD_MEMORY_SANITIZER 1
# else
# define ZSTD_MEMORY_SANITIZER 0
# endif
#endif
#if ZSTD_MEMORY_SANITIZER
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stddef.h> /* size_t */
#define ZSTD_DEPS_NEED_STDINT
#include "zstd_deps.h" /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
#endif
/* detects whether we are being compiled under asan */
#ifndef ZSTD_ADDRESS_SANITIZER
# if __has_feature(address_sanitizer)
# define ZSTD_ADDRESS_SANITIZER 1
# elif defined(__SANITIZE_ADDRESS__)
# define ZSTD_ADDRESS_SANITIZER 1
# else
# define ZSTD_ADDRESS_SANITIZER 0
# endif
#endif
#if ZSTD_ADDRESS_SANITIZER
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stddef.h> /* size_t */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
#endif /* ZSTD_COMPILER_H */

View file

@ -16,8 +16,6 @@
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
*/
#include <string.h>
#include "mem.h"
#ifdef _MSC_VER

View file

@ -51,15 +51,6 @@ extern "C" {
#endif
/* DEBUGFILE can be defined externally,
* typically through compiler command line.
* note : currently useless.
* Value must be stderr or stdout */
#ifndef DEBUGFILE
# define DEBUGFILE stderr
#endif
/* recommended values for DEBUGLEVEL :
* 0 : release mode, no debug, all run-time checks disabled
* 1 : enables assert() only, no display
@ -76,7 +67,8 @@ extern "C" {
*/
#if (DEBUGLEVEL>=1)
# include <assert.h>
# define ZSTD_DEPS_NEED_ASSERT
# include "zstd_deps.h"
#else
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
# define assert(condition) ((void)0) /* disable assert (default) */
@ -84,7 +76,8 @@ extern "C" {
#endif
#if (DEBUGLEVEL>=2)
# include <stdio.h>
# define ZSTD_DEPS_NEED_IO
# include "zstd_deps.h"
extern int g_debuglevel; /* the variable is only declared,
it actually lives in debug.c,
and is shared by the whole process.
@ -92,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared,
It's useful when enabling very verbose levels
on selective conditions (such as position in src) */
# define RAWLOG(l, ...) { \
if (l<=g_debuglevel) { \
fprintf(stderr, __VA_ARGS__); \
# define RAWLOG(l, ...) { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if (l<=g_debuglevel) { \
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
fprintf(stderr, " \n"); \
# define DEBUGLOG(l, ...) { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
ZSTD_DEBUG_PRINT(" \n"); \
} }
#else
# define RAWLOG(l, ...) {} /* disabled */

View file

@ -38,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
static U32 FSE_ctz(U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanForward(&r, val) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_ctz(val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return __CTZ(val);
# else /* Software version */
U32 count = 0;
while ((val & 1) == 0) {
val >>= 1;
++count;
}
return count;
# endif
}
}
FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
const BYTE* const istart = (const BYTE*) headerBuffer;
const BYTE* const iend = istart + hbSize;
@ -50,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
U32 bitStream;
int bitCount;
unsigned charnum = 0;
unsigned const maxSV1 = *maxSVPtr + 1;
int previous0 = 0;
if (hbSize < 4) {
/* This function only works when hbSize >= 4 */
char buffer[4];
memset(buffer, 0, sizeof(buffer));
memcpy(buffer, headerBuffer, hbSize);
if (hbSize < 8) {
/* This function only works when hbSize >= 8 */
char buffer[8] = {0};
ZSTD_memcpy(buffer, headerBuffer, hbSize);
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
buffer, sizeof(buffer));
if (FSE_isError(countSize)) return countSize;
if (countSize > hbSize) return ERROR(corruption_detected);
return countSize;
} }
assert(hbSize >= 4);
assert(hbSize >= 8);
/* init */
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
bitStream = MEM_readLE32(ip);
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@ -77,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
threshold = 1<<nbBits;
nbBits++;
while ((remaining>1) & (charnum<=*maxSVPtr)) {
for (;;) {
if (previous0) {
unsigned n0 = charnum;
while ((bitStream & 0xFFFF) == 0xFFFF) {
n0 += 24;
if (ip < iend-5) {
ip += 2;
bitStream = MEM_readLE32(ip) >> bitCount;
/* Count the number of repeats. Each time the
* 2-bit repeat code is 0b11 there is another
* repeat.
* Avoid UB by setting the high bit to 1.
*/
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) {
charnum += 3 * 12;
if (LIKELY(ip <= iend-7)) {
ip += 3;
} else {
bitStream >>= 16;
bitCount += 16;
} }
while ((bitStream & 3) == 3) {
n0 += 3;
bitStream >>= 2;
bitCount += 2;
bitCount -= (int)(8 * (iend - 7 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
}
n0 += bitStream & 3;
charnum += 3 * repeats;
bitStream >>= 2 * repeats;
bitCount += 2 * repeats;
/* Add the final repeat which isn't 0b11. */
assert((bitStream & 3) < 3);
charnum += bitStream & 3;
bitCount += 2;
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
while (charnum < n0) normalizedCounter[charnum++] = 0;
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
/* This is an error, but break and return an error
* at the end, because returning out of a loop makes
* it harder for the compiler to optimize.
*/
if (charnum >= maxSV1) break;
/* We don't need to set the normalized count to 0
* because we already memset the whole buffer to 0.
*/
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert((bitCount >> 3) <= 3); /* For first condition to work */
ip += bitCount>>3;
bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 2;
} }
{ int const max = (2*threshold-1) - remaining;
bitCount -= (int)(8 * (iend - 4 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
}
{
int const max = (2*threshold-1) - remaining;
int count;
if ((bitStream & (threshold-1)) < (U32)max) {
@ -119,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
}
count--; /* extra accuracy */
remaining -= count < 0 ? -count : count; /* -1 means +1 */
/* When it matters (small blocks), this is a
* predictable branch, because we don't use -1.
*/
if (count >= 0) {
remaining -= count;
} else {
assert(count == -1);
remaining += count;
}
normalizedCounter[charnum++] = (short)count;
previous0 = !count;
while (remaining < threshold) {
nbBits--;
threshold >>= 1;
}
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert(threshold > 1);
if (remaining < threshold) {
/* This branch can be folded into the
* threshold update condition because we
* know that threshold > 1.
*/
if (remaining <= 1) break;
nbBits = BIT_highbit32(remaining) + 1;
threshold = 1 << (nbBits - 1);
}
if (charnum >= maxSV1) break;
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
ip += bitCount>>3;
bitCount &= 7;
} else {
bitCount -= (int)(8 * (iend - 4 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
bitStream = MEM_readLE32(ip) >> bitCount;
} }
if (remaining != 1) return ERROR(corruption_detected);
/* Only possible when there are too many zeros. */
if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
if (bitCount > 32) return ERROR(corruption_detected);
*maxSVPtr = charnum-1;
@ -144,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
return ip-istart;
}
/* Avoids the FORCE_INLINE of the _body() function. */
static size_t FSE_readNCount_body_default(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#endif
size_t FSE_readNCount_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#endif
(void)bmi2;
return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
size_t FSE_readNCount(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
}
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
@ -155,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize)
{
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
}
FORCE_INLINE_TEMPLATE size_t
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize,
int bmi2)
{
U32 weightTotal;
const BYTE* ip = (const BYTE*) src;
@ -163,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
if (!srcSize) return ERROR(srcSize_wrong);
iSize = ip[0];
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
/* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
if (iSize >= 128) { /* special header */
oSize = iSize - 127;
@ -177,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
huffWeight[n+1] = ip[n/2] & 15;
} } }
else { /* header compressed with FSE (normal case) */
FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
/* max (hwSize-1) values decoded, as last one is implied */
oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
if (FSE_isError(oSize)) return oSize;
}
/* collect weight stats */
memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
@ -214,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
*nbSymbolsPtr = (U32)(oSize+1);
return iSize+1;
}
/* Avoids the FORCE_INLINE of the _body() function. */
static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
{
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
}
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
{
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
}
#endif
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize,
int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}
#endif
(void)bmi2;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}

View file

@ -48,6 +48,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
case PREFIX(maxCode):
default: return notErrorCode;
}

View file

@ -21,7 +21,7 @@ extern "C" {
/* ****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t */
#include "zstd_deps.h" /* size_t */
#include "zstd_errors.h" /* enum list */

View file

@ -23,7 +23,7 @@ extern "C" {
/*-*****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include "zstd_deps.h" /* size_t, ptrdiff_t */
/*-*****************************************
@ -137,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
/*! FSE_normalizeCount():
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
useLowProbCount is a boolean parameter which trades off compressed size for
faster header decoding. When it is set to 1, the compressed data will be slightly
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
is a good default, since header deserialization makes a big speed difference.
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
@return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
/*! FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@ -228,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize);
/*! FSE_readNCount_bmi2():
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
*/
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize, int bmi2);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
@ -288,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
*******************************************/
/* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@ -309,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
* FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
*/
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@ -322,18 +335,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` must be >= `(1<<tableLog)`.
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
*/
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
typedef enum {
FSE_repeat_none, /**< Cannot use the previous table */
@ -644,6 +669,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
#ifndef FSE_DEFAULT_MEMORY_USAGE
# define FSE_DEFAULT_MEMORY_USAGE 13
#endif
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
#endif
/*!FSE_MAX_SYMBOL_VALUE :
* Maximum symbol value authorized.
@ -677,7 +705,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
#endif
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
#endif /* FSE_STATIC_LINKING_ONLY */

View file

@ -16,13 +16,14 @@
/* **************************************************************
* Includes
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include "debug.h" /* assert */
#include "bitstream.h"
#include "compiler.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#include "error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h"
/* **************************************************************
@ -59,25 +60,27 @@
FSE_DTable* FSE_createDTable (unsigned tableLog)
{
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
}
void FSE_freeDTable (FSE_DTable* dt)
{
free(dt);
ZSTD_free(dt);
}
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
U16* symbolNext = (U16*)workSpace;
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
U32 const maxSV1 = maxSymbolValue + 1;
U32 const tableSize = 1 << tableLog;
U32 highThreshold = tableSize-1;
/* Sanity Checks */
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
@ -95,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
symbolNext[s] = normalizedCounter[s];
} } }
memcpy(dt, &DTableH, sizeof(DTableH));
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
}
/* Spread symbols */
{ U32 const tableMask = tableSize-1;
if (highThreshold == tableSize - 1) {
size_t const tableMask = tableSize-1;
size_t const step = FSE_TABLESTEP(tableSize);
/* First lay down the symbols in order.
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
* misses since small blocks generally have small table logs, so nearly
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
* our buffer to handle the over-write.
*/
{
U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
for (s=0; s<maxSV1; ++s, sv += add) {
int i;
int const n = normalizedCounter[s];
MEM_write64(spread + pos, sv);
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
pos += n;
}
}
/* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the the
* variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what emperically worked best.
*/
{
size_t position = 0;
size_t s;
size_t const unroll = 2;
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
for (s = 0; s < (size_t)tableSize; s += unroll) {
size_t u;
for (u = 0; u < unroll; ++u) {
size_t const uPosition = (position + (u * step)) & tableMask;
tableDecode[uPosition].symbol = spread[s + u];
}
position = (position + (unroll * step)) & tableMask;
}
assert(position == 0);
}
} else {
U32 const tableMask = tableSize-1;
U32 const step = FSE_TABLESTEP(tableSize);
U32 s, position = 0;
for (s=0; s<maxSV1; s++) {
@ -124,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
return 0;
}
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
}
#ifndef FSE_COMMONDEFS_ONLY
@ -251,36 +305,89 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
}
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
}
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize,
unsigned maxLog, void* workSpace, size_t wkspSize,
int bmi2)
{
const BYTE* const istart = (const BYTE*)cSrc;
const BYTE* ip = istart;
short counting[FSE_MAX_SYMBOL_VALUE+1];
unsigned tableLog;
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
FSE_DTable* const dtable = (FSE_DTable*)workSpace;
/* normal FSE decoding mode */
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
/* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
ip += NCountLength;
cSrcSize -= NCountLength;
CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
wkspSize -= FSE_DTABLE_SIZE(tableLog);
return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
{
const void* ptr = dtable;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
}
}
/* Avoids the FORCE_INLINE of the _body() function. */
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
}
#endif
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}
#endif
(void)bmi2;
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
{
DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
}
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
{
/* Static analyzer seems unable to understand this table will be properly initialized later */
U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
}
#endif
#endif /* FSE_COMMONDEFS_ONLY */

View file

@ -20,7 +20,7 @@ extern "C" {
#define HUF_H_298734234
/* *** Dependencies *** */
#include <stddef.h> /* size_t */
#include "zstd_deps.h" /* size_t */
/* *** library symbols visibility *** */
@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* *** Dependencies *** */
#include "mem.h" /* U32 */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
/* *** Constants *** */
@ -133,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* static allocation of HUF's Compression Table */
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
struct HUF_CElt_s {
U16 val;
BYTE nbBits;
}; /* typedef'd to HUF_CElt */
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
void* name##hv = &(name##hb); \
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
/* static allocation of HUF's DTable */
typedef U32 HUF_DTable;
@ -184,7 +190,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* or to save and regenerate 'CTable' using external methods.
*/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@ -226,6 +231,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize);
/*! HUF_readStats_wksp() :
* Same as HUF_readStats() but takes an external workspace which must be
* 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
int bmi2);
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
@ -332,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
#endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif
#endif /* HUF_STATIC_LINKING_ONLY */

View file

@ -18,8 +18,10 @@ extern "C" {
/*-****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include <string.h> /* memcpy */
#include <stddef.h> /* size_t, ptrdiff_t */
#include "compiler.h" /* __has_builtin */
#include "debug.h" /* DEBUG_STATIC_ASSERT */
#include "zstd_deps.h" /* ZSTD_memcpy */
/*-****************************************
@ -39,93 +41,15 @@ extern "C" {
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#ifndef __has_builtin
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
#endif
/* code only tested on 32 and 64 bits systems */
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
/* detects whether we are being compiled under msan */
#if defined (__has_feature)
# if __has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1
# endif
#endif
#if defined (MEMORY_SANITIZER)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stdint.h> /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
#endif
/* detects whether we are being compiled under asan */
#if defined (__has_feature)
# if __has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1
# endif
#elif defined(__SANITIZE_ADDRESS__)
# define ADDRESS_SANITIZER 1
#endif
#if defined (ADDRESS_SANITIZER)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
/*-**************************************************************
* Basic Types
*****************************************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;
@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
/*-**************************************************************
* Memory I/O
* Memory I/O API
*****************************************************************/
/*=== Static platform detection ===*/
MEM_STATIC unsigned MEM_32bits(void);
MEM_STATIC unsigned MEM_64bits(void);
MEM_STATIC unsigned MEM_isLittleEndian(void);
/*=== Native unaligned read/write ===*/
MEM_STATIC U16 MEM_read16(const void* memPtr);
MEM_STATIC U32 MEM_read32(const void* memPtr);
MEM_STATIC U64 MEM_read64(const void* memPtr);
MEM_STATIC size_t MEM_readST(const void* memPtr);
MEM_STATIC void MEM_write16(void* memPtr, U16 value);
MEM_STATIC void MEM_write32(void* memPtr, U32 value);
MEM_STATIC void MEM_write64(void* memPtr, U64 value);
/*=== Little endian unaligned read/write ===*/
MEM_STATIC U16 MEM_readLE16(const void* memPtr);
MEM_STATIC U32 MEM_readLE24(const void* memPtr);
MEM_STATIC U32 MEM_readLE32(const void* memPtr);
MEM_STATIC U64 MEM_readLE64(const void* memPtr);
MEM_STATIC size_t MEM_readLEST(const void* memPtr);
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
/*=== Big endian unaligned read/write ===*/
MEM_STATIC U32 MEM_readBE32(const void* memPtr);
MEM_STATIC U64 MEM_readBE64(const void* memPtr);
MEM_STATIC size_t MEM_readBEST(const void* memPtr);
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
/*=== Byteswap ===*/
MEM_STATIC U32 MEM_swap32(U32 in);
MEM_STATIC U64 MEM_swap64(U64 in);
MEM_STATIC size_t MEM_swapST(size_t in);
/*-**************************************************************
* Memory I/O Implementation
*****************************************************************/
/* MEM_FORCE_MEMORY_ACCESS :
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
@ -236,37 +206,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
MEM_STATIC U16 MEM_read16(const void* memPtr)
{
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC U32 MEM_read32(const void* memPtr)
{
U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC U64 MEM_read64(const void* memPtr)
{
U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC size_t MEM_readST(const void* memPtr)
{
size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC void MEM_write16(void* memPtr, U16 value)
{
memcpy(memPtr, &value, sizeof(value));
ZSTD_memcpy(memPtr, &value, sizeof(value));
}
MEM_STATIC void MEM_write32(void* memPtr, U32 value)
{
memcpy(memPtr, &value, sizeof(value));
ZSTD_memcpy(memPtr, &value, sizeof(value));
}
MEM_STATIC void MEM_write64(void* memPtr, U64 value)
{
memcpy(memPtr, &value, sizeof(value));
ZSTD_memcpy(memPtr, &value, sizeof(value));
}
#endif /* MEM_FORCE_MEMORY_ACCESS */
@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
MEM_writeBE64(memPtr, (U64)val);
}
/* code only tested on 32 and 64 bits systems */
MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
#if defined (__cplusplus)
}

View file

@ -10,9 +10,9 @@
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include "zstd_deps.h" /* size_t */
#include "debug.h" /* assert */
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "pool.h"
/* ====== Compiler specifics ====== */
@ -105,6 +105,10 @@ static void* POOL_thread(void* opaque) {
assert(0); /* Unreachable */
}
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
return POOL_create (numThreads, 0);
}
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
}
@ -115,14 +119,14 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
/* Check parameters */
if (!numThreads) { return NULL; }
/* Allocate the context and zero initialize */
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
if (!ctx) { return NULL; }
/* Initialize the job queue.
* It needs one extra space since one space is wasted to differentiate
* empty and full queues.
*/
ctx->queueSize = queueSize + 1;
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
ctx->queueHead = 0;
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
}
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->threadCapacity = 0;
ctx->customMem = customMem;
/* Check for errors */
@ -179,12 +183,14 @@ void POOL_free(POOL_ctx *ctx) {
ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
ZSTD_free(ctx->queue, ctx->customMem);
ZSTD_free(ctx->threads, ctx->customMem);
ZSTD_free(ctx, ctx->customMem);
ZSTD_customFree(ctx->queue, ctx->customMem);
ZSTD_customFree(ctx->threads, ctx->customMem);
ZSTD_customFree(ctx, ctx->customMem);
}
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
POOL_free (pool);
}
size_t POOL_sizeof(POOL_ctx *ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
@ -203,11 +209,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
return 0;
}
/* numThreads > threadCapacity */
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
if (!threadPool) return 1;
/* replace existing thread pool */
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
ZSTD_free(ctx->threads, ctx->customMem);
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
ZSTD_customFree(ctx->threads, ctx->customMem);
ctx->threads = threadPool;
/* Initialize additional threads */
{ size_t threadId;
@ -301,7 +307,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
struct POOL_ctx_s {
int dummy;
};
static POOL_ctx g_ctx;
static POOL_ctx g_poolCtx;
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
@ -311,11 +317,11 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
(void)numThreads;
(void)queueSize;
(void)customMem;
return &g_ctx;
return &g_poolCtx;
}
void POOL_free(POOL_ctx* ctx) {
assert(!ctx || ctx == &g_ctx);
assert(!ctx || ctx == &g_poolCtx);
(void)ctx;
}
@ -337,7 +343,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
size_t POOL_sizeof(POOL_ctx* ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
assert(ctx == &g_ctx);
assert(ctx == &g_poolCtx);
return sizeof(*ctx);
}

View file

@ -16,7 +16,7 @@ extern "C" {
#endif
#include <stddef.h> /* size_t */
#include "zstd_deps.h"
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
#include "../zstd.h"

View file

@ -78,11 +78,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
#include <stdlib.h>
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h"
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
*mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
if (!*mutex)
return 1;
return pthread_mutex_init(*mutex, attr);
@ -94,14 +95,14 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
return 0;
{
int const ret = pthread_mutex_destroy(*mutex);
free(*mutex);
ZSTD_free(*mutex);
return ret;
}
}
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
*cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
if (!*cond)
return 1;
return pthread_cond_init(*cond, attr);
@ -113,7 +114,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
return 0;
{
int const ret = pthread_cond_destroy(*cond);
free(*cond);
ZSTD_free(*cond);
return ret;
}
}

View file

@ -77,14 +77,12 @@
* Includes & Memory related functions
***************************************/
/* Modify the local functions below should you wish to use some other memory routines */
/* for malloc(), free() */
#include <stdlib.h>
#include <stddef.h> /* size_t */
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
/* for memcpy() */
#include <string.h>
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
/* for ZSTD_malloc(), ZSTD_free() */
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
static void XXH_free (void* p) { ZSTD_free(p); }
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
#ifndef XXH_STATIC_LINKING_ONLY
# define XXH_STATIC_LINKING_ONLY
@ -95,49 +93,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
/* *************************************
* Compiler Specific Options
***************************************/
#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
#endif
#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
#else
# define FORCE_INLINE_ATTR
#endif
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
#ifdef _MSC_VER
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
#include "compiler.h"
/* *************************************
* Basic Types
***************************************/
#ifndef MEM_MODULE
# define MEM_MODULE
# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
# else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
# endif
#endif
#include "mem.h" /* BYTE, U32, U64, size_t */
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
@ -163,14 +125,14 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
static U32 XXH_read32(const void* memPtr)
{
U32 val;
memcpy(&val, memPtr, sizeof(val));
ZSTD_memcpy(&val, memPtr, sizeof(val));
return val;
}
static U64 XXH_read64(const void* memPtr)
{
U64 val;
memcpy(&val, memPtr, sizeof(val));
ZSTD_memcpy(&val, memPtr, sizeof(val));
return val;
}
@ -307,12 +269,12 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
****************************/
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
{
memcpy(dstState, srcState, sizeof(*dstState));
ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
}
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
{
memcpy(dstState, srcState, sizeof(*dstState));
ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
}
@ -554,12 +516,12 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
{
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
state.v1 = seed + PRIME32_1 + PRIME32_2;
state.v2 = seed + PRIME32_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME32_1;
memcpy(statePtr, &state, sizeof(state));
ZSTD_memcpy(statePtr, &state, sizeof(state));
return XXH_OK;
}
@ -567,12 +529,12 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int s
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
{
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
state.v1 = seed + PRIME64_1 + PRIME64_2;
state.v2 = seed + PRIME64_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME64_1;
memcpy(statePtr, &state, sizeof(state));
ZSTD_memcpy(statePtr, &state, sizeof(state));
return XXH_OK;
}
@ -843,14 +805,14 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
{
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
memcpy(dst, &hash, sizeof(*dst));
ZSTD_memcpy(dst, &hash, sizeof(*dst));
}
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
memcpy(dst, &hash, sizeof(*dst));
ZSTD_memcpy(dst, &hash, sizeof(*dst));
}
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)

View file

@ -55,7 +55,7 @@ extern "C" {
/* ****************************
* Definitions
******************************/
#include <stddef.h> /* size_t */
#include "zstd_deps.h"
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;

View file

@ -13,8 +13,8 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdlib.h> /* malloc, calloc, free */
#include <string.h> /* memset */
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h"
#include "zstd_internal.h"
@ -53,31 +53,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString
/*=**************************************************************
* Custom allocator
****************************************************************/
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return malloc(size);
return ZSTD_malloc(size);
}
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
memset(ptr, 0, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return calloc(1, size);
return ZSTD_calloc(1, size);
}
void ZSTD_free(void* ptr, ZSTD_customMem customMem)
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
free(ptr);
ZSTD_free(ptr);
}
}

111
thirdparty/zstd/common/zstd_deps.h vendored Normal file
View file

@ -0,0 +1,111 @@
/*
* Copyright (c) 2016-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This file provides common libc dependencies that zstd requires.
* The purpose is to allow replacing this file with a custom implementation
* to compile zstd without libc support.
*/
/* Need:
* NULL
* INT_MAX
* UINT_MAX
* ZSTD_memcpy()
* ZSTD_memset()
* ZSTD_memmove()
*/
#ifndef ZSTD_DEPS_COMMON
#define ZSTD_DEPS_COMMON
#include <limits.h>
#include <stddef.h>
#include <string.h>
#if defined(__GNUC__) && __GNUC__ >= 4
# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
#else
# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
# define ZSTD_memset(p,v,l) memset((p),(v),(l))
#endif
#endif /* ZSTD_DEPS_COMMON */
/* Need:
* ZSTD_malloc()
* ZSTD_free()
* ZSTD_calloc()
*/
#ifdef ZSTD_DEPS_NEED_MALLOC
#ifndef ZSTD_DEPS_MALLOC
#define ZSTD_DEPS_MALLOC
#include <stdlib.h>
#define ZSTD_malloc(s) malloc(s)
#define ZSTD_calloc(n,s) calloc((n), (s))
#define ZSTD_free(p) free((p))
#endif /* ZSTD_DEPS_MALLOC */
#endif /* ZSTD_DEPS_NEED_MALLOC */
/*
* Provides 64-bit math support.
* Need:
* U64 ZSTD_div64(U64 dividend, U32 divisor)
*/
#ifdef ZSTD_DEPS_NEED_MATH64
#ifndef ZSTD_DEPS_MATH64
#define ZSTD_DEPS_MATH64
#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
#endif /* ZSTD_DEPS_MATH64 */
#endif /* ZSTD_DEPS_NEED_MATH64 */
/* Need:
* assert()
*/
#ifdef ZSTD_DEPS_NEED_ASSERT
#ifndef ZSTD_DEPS_ASSERT
#define ZSTD_DEPS_ASSERT
#include <assert.h>
#endif /* ZSTD_DEPS_ASSERT */
#endif /* ZSTD_DEPS_NEED_ASSERT */
/* Need:
* ZSTD_DEBUG_PRINT()
*/
#ifdef ZSTD_DEPS_NEED_IO
#ifndef ZSTD_DEPS_IO
#define ZSTD_DEPS_IO
#include <stdio.h>
#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
#endif /* ZSTD_DEPS_IO */
#endif /* ZSTD_DEPS_NEED_IO */
/* Only requested when <stdint.h> is known to be present.
* Need:
* intptr_t
*/
#ifdef ZSTD_DEPS_NEED_STDINT
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
#include <stdint.h>
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */

View file

@ -77,6 +77,7 @@ typedef enum {
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;

View file

@ -19,7 +19,7 @@
/*-*************************************
* Dependencies
***************************************/
#ifdef __aarch64__
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#include "compiler.h"
@ -139,7 +139,7 @@ void _force_has_format_string(const char *format, ...) {
#define ZSTD_REP_NUM 3 /* number of repcodes */
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
#define KB *(1 <<10)
#define MB *(1 <<20)
@ -153,13 +153,13 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
#define BIT0 1
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define ZSTD_FRAMECHECKSUMSIZE 4
@ -186,61 +186,75 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define OffFSELog 8
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 6, 7, 8, 9,10,11,12,
13,14,15,16 };
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 2, 1, 1, 1, 1, 1,
-1,-1,-1,-1 };
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
/* Each table cannot take more than #symbols * FSELog bits */
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 6, 7, 8, 9,10,11,12,
13,14,15,16
};
static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
4, 3, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 2, 1, 1, 1, 1, 1,
-1,-1,-1,-1
};
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 4, 5, 7, 8, 9,10,11,
12,13,14,15,16 };
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1,-1,-1,
-1,-1,-1,-1,-1 };
static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3,
4, 4, 5, 7, 8, 9,10,11,
12,13,14,15,16
};
static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
1, 4, 3, 2, 2, 2, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1,-1,-1,
-1,-1,-1,-1,-1
};
#define ML_DEFAULTNORMLOG 6 /* for static allocation */
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
-1,-1,-1,-1,-1 };
static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
1, 1, 1, 1, 1, 1, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
-1,-1,-1,-1,-1
};
#define OF_DEFAULTNORMLOG 5 /* for static allocation */
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
/*-*******************************************
* Shared functions to include for inlining
*********************************************/
static void ZSTD_copy8(void* dst, const void* src) {
#ifdef __aarch64__
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
#else
memcpy(dst, src, 8);
ZSTD_memcpy(dst, src, 8);
#endif
}
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
static void ZSTD_copy16(void* dst, const void* src) {
#ifdef __aarch64__
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
#else
memcpy(dst, src, 16);
ZSTD_memcpy(dst, src, 16);
#endif
}
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
@ -255,13 +269,13 @@ typedef enum {
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* @param ovtype controls the overlap detection
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
* The src buffer must be before the dst buffer.
*/
MEM_STATIC FORCE_INLINE_ATTR
MEM_STATIC FORCE_INLINE_ATTR
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
@ -284,14 +298,16 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
* one COPY16() in the first call. Then, do two calls per loop since
* at that point it is more likely to have a high trip count.
*/
#ifndef __aarch64__
#ifdef __aarch64__
do {
COPY16(op, ip);
}
while (op < oend);
#else
COPY16(op, ip);
if (op >= oend) return;
ZSTD_copy16(op, ip);
if (16 >= length) return;
op += 16;
ip += 16;
do {
COPY16(op, ip);
COPY16(op, ip);
@ -305,7 +321,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
{
size_t const length = MIN(dstCapacity, srcSize);
if (length > 0) {
memcpy(dst, src, length);
ZSTD_memcpy(dst, src, length);
}
return length;
}
@ -320,28 +336,39 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
* In which case, resize it down to free some memory */
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
/* Controls whether the input/output buffer is buffered or stable. */
typedef enum {
ZSTD_bm_buffered = 0, /* Buffer the input/output */
ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
} ZSTD_bufferMode_e;
/*-*******************************************
* Private declarations
*********************************************/
typedef struct seqDef_s {
U32 offset;
U32 offset; /* Offset code of the sequence */
U16 litLength;
U16 matchLength;
} seqDef;
typedef struct {
seqDef* sequencesStart;
seqDef* sequences;
seqDef* sequences; /* ptr to end of sequences */
BYTE* litStart;
BYTE* lit;
BYTE* lit; /* ptr to end of literals */
BYTE* llCode;
BYTE* mlCode;
BYTE* ofCode;
size_t maxNbSeq;
size_t maxNbLit;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
/* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
* the existing value of the litLength or matchLength by 0x10000.
*/
U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
} seqStore_t;
typedef struct {
@ -384,9 +411,9 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBu
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
/* custom memory allocation functions */
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
void ZSTD_free(void* ptr, ZSTD_customMem customMem);
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
@ -394,8 +421,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# if STATIC_BMI2 == 1
return _lzcnt_u32(val)^31;
# else
unsigned long r=0;
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */