From 5be2dd25f292b8e28186f710ad1f814b0c4cfb1f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Nov 2015 13:43:58 +0100 Subject: [PATCH 01/79] Changed Library source tree - no more zstdhc ; zstd.h is enough - ZSTD_compress() now needs a compressionLevel - separated zstd_compress.c and zstd_decompress.c - updated zstdcli, fullbench, fuzzer with new API --- lib/zstd.h | 5 +- lib/{zstdhc.c => zstd_compress.c} | 258 +++---- lib/zstd_decompress.c | 1103 +++++++++++++++++++++++++++++ lib/zstd_static.h | 103 ++- lib/zstdhc.h | 76 -- lib/zstdhc_static.h | 156 ---- programs/Makefile | 14 +- programs/bench.c | 13 +- programs/fileio.c | 76 +- programs/fullbench.c | 10 +- programs/fuzzer.c | 21 +- programs/paramgrill.c | 113 ++- 12 files changed, 1425 insertions(+), 523 deletions(-) rename lib/{zstdhc.c => zstd_compress.c} (75%) create mode 100644 lib/zstd_decompress.c delete mode 100644 lib/zstdhc.h delete mode 100644 lib/zstdhc_static.h diff --git a/lib/zstd.h b/lib/zstd.h index 81437885375..3b18a867d69 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -57,7 +57,8 @@ unsigned ZSTD_versionNumber (void); * Simple functions ***************************************/ size_t ZSTD_compress( void* dst, size_t maxDstSize, - const void* src, size_t srcSize); + const void* src, size_t srcSize, + int compressionLevel); size_t ZSTD_decompress( void* dst, size_t maxOriginalSize, const void* src, size_t compressedSize); @@ -100,7 +101,7 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); ZSTD_compressCCtx() : Same as ZSTD_compress(), but requires a ZSTD_CCtx working space already allocated */ -size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel); #if defined (__cplusplus) diff --git a/lib/zstdhc.c b/lib/zstd_compress.c similarity index 75% rename from lib/zstdhc.c rename to lib/zstd_compress.c index fdf6dc95ddd..f3cf83c6160 100644 --- a/lib/zstdhc.c +++ b/lib/zstd_compress.c @@ -55,7 +55,6 @@ ***************************************/ #include /* malloc */ #include /* memset */ -#include "zstdhc_static.h" #include "zstd_static.h" #include "zstd_internal.h" #include "mem.h" @@ -77,7 +76,7 @@ #define BLOCKSIZE (128 KB) /* define, for static allocation */ #define WORKPLACESIZE (BLOCKSIZE*3) -struct ZSTD_HC_CCtx_s +struct ZSTD_CCtx_s { const BYTE* end; /* next block here to continue on current prefix */ const BYTE* base; /* All regular indexes relative to this position */ @@ -85,7 +84,7 @@ struct ZSTD_HC_CCtx_s U32 dictLimit; /* below that point, need extDict */ U32 lowLimit; /* below that point, no more data */ U32 nextToUpdate; /* index from which to continue dictionary update */ - ZSTD_HC_parameters params; + ZSTD_parameters params; void* workSpace; size_t workSpaceSize; @@ -95,12 +94,12 @@ struct ZSTD_HC_CCtx_s }; -ZSTD_HC_CCtx* ZSTD_HC_createCCtx(void) +ZSTD_CCtx* ZSTD_createCCtx(void) { - return (ZSTD_HC_CCtx*) calloc(1, sizeof(ZSTD_HC_CCtx)); + return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx)); } -size_t ZSTD_HC_freeCCtx(ZSTD_HC_CCtx* cctx) +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) { free(cctx->workSpace); free(cctx); @@ -108,45 +107,45 @@ size_t ZSTD_HC_freeCCtx(ZSTD_HC_CCtx* cctx) } -/** ZSTD_HC_validateParams +/** ZSTD_validateParams correct params value to remain within authorized range optimize for srcSize if srcSize > 0 */ -void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, U64 srcSizeHint) +void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint) { - const U32 btPlus = (params->strategy == ZSTD_HC_btlazy2); + const U32 btPlus = (params->strategy == ZSTD_btlazy2); /* validate params */ - if (params->windowLog > ZSTD_HC_WINDOWLOG_MAX) params->windowLog = ZSTD_HC_WINDOWLOG_MAX; - if (params->windowLog < ZSTD_HC_WINDOWLOG_MIN) params->windowLog = ZSTD_HC_WINDOWLOG_MIN; + if (params->windowLog > ZSTD_WINDOWLOG_MAX) params->windowLog = ZSTD_WINDOWLOG_MAX; + if (params->windowLog < ZSTD_WINDOWLOG_MIN) params->windowLog = ZSTD_WINDOWLOG_MIN; /* correct params, to use less memory */ - if ((srcSizeHint > 0) && (srcSizeHint < (1< 0) && (srcSizeHint < (1<windowLog > srcLog) params->windowLog = srcLog; } - if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_HC_CONTENTLOG_MAX */ - if (params->contentLog < ZSTD_HC_CONTENTLOG_MIN) params->contentLog = ZSTD_HC_CONTENTLOG_MIN; - if (params->hashLog > ZSTD_HC_HASHLOG_MAX) params->hashLog = ZSTD_HC_HASHLOG_MAX; - if (params->hashLog < ZSTD_HC_HASHLOG_MIN) params->hashLog = ZSTD_HC_HASHLOG_MIN; - if (params->searchLog > ZSTD_HC_SEARCHLOG_MAX) params->searchLog = ZSTD_HC_SEARCHLOG_MAX; - if (params->searchLog < ZSTD_HC_SEARCHLOG_MIN) params->searchLog = ZSTD_HC_SEARCHLOG_MIN; - if (params->searchLength> ZSTD_HC_SEARCHLENGTH_MAX) params->searchLength = ZSTD_HC_SEARCHLENGTH_MAX; - if (params->searchLength< ZSTD_HC_SEARCHLENGTH_MIN) params->searchLength = ZSTD_HC_SEARCHLENGTH_MIN; - if ((U32)params->strategy>(U32)ZSTD_HC_btlazy2) params->strategy = ZSTD_HC_btlazy2; + if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ + if (params->contentLog < ZSTD_CONTENTLOG_MIN) params->contentLog = ZSTD_CONTENTLOG_MIN; + if (params->hashLog > ZSTD_HASHLOG_MAX) params->hashLog = ZSTD_HASHLOG_MAX; + if (params->hashLog < ZSTD_HASHLOG_MIN) params->hashLog = ZSTD_HASHLOG_MIN; + if (params->searchLog > ZSTD_SEARCHLOG_MAX) params->searchLog = ZSTD_SEARCHLOG_MAX; + if (params->searchLog < ZSTD_SEARCHLOG_MIN) params->searchLog = ZSTD_SEARCHLOG_MIN; + if (params->searchLength> ZSTD_SEARCHLENGTH_MAX) params->searchLength = ZSTD_SEARCHLENGTH_MAX; + if (params->searchLength< ZSTD_SEARCHLENGTH_MIN) params->searchLength = ZSTD_SEARCHLENGTH_MIN; + if ((U32)params->strategy>(U32)ZSTD_btlazy2) params->strategy = ZSTD_btlazy2; } -static size_t ZSTD_HC_resetCCtx_advanced (ZSTD_HC_CCtx* zc, - ZSTD_HC_parameters params, +static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, + ZSTD_parameters params, U64 srcSizeHint) { - ZSTD_HC_validateParams(¶ms, srcSizeHint); + ZSTD_validateParams(¶ms, srcSizeHint); /* reserve table memory */ { - const U32 contentLog = params.strategy == ZSTD_HC_fast ? 1 : params.contentLog; + const U32 contentLog = params.strategy == ZSTD_fast ? 1 : params.contentLog; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32); const size_t neededSpace = tableSpace + WORKPLACESIZE; if (zc->workSpaceSize < neededSpace) @@ -185,30 +184,30 @@ static size_t ZSTD_HC_resetCCtx_advanced (ZSTD_HC_CCtx* zc, ***************************************/ static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_HC_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_HC_hash4Ptr(const void* ptr, U32 h) { return ZSTD_HC_hash4(MEM_read32(ptr), h); } +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_HC_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } -static size_t ZSTD_HC_hash5Ptr(const void* p, U32 h) { return ZSTD_HC_hash5(MEM_read64(p), h); } +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); } static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } -static size_t ZSTD_HC_hash6Ptr(const void* p, U32 h) { return ZSTD_HC_hash6(MEM_read64(p), h); } +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); } static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } -static size_t ZSTD_HC_hash7Ptr(const void* p, U32 h) { return ZSTD_HC_hash7(MEM_read64(p), h); } +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); } -static size_t ZSTD_HC_hashPtr(const void* p, U32 hBits, U32 mls) +static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) { switch(mls) { default: - case 4: return ZSTD_HC_hash4Ptr(p, hBits); - case 5: return ZSTD_HC_hash5Ptr(p, hBits); - case 6: return ZSTD_HC_hash6Ptr(p, hBits); - case 7: return ZSTD_HC_hash7Ptr(p, hBits); + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); } } @@ -217,7 +216,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 hBits, U32 mls) ***************************************/ FORCE_INLINE -size_t ZSTD_HC_compressBlock_fast_generic(ZSTD_HC_CCtx* ctx, +size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const U32 mls) @@ -241,9 +240,9 @@ size_t ZSTD_HC_compressBlock_fast_generic(ZSTD_HC_CCtx* ctx, /* init */ if (ip == base) { - hashTable[ZSTD_HC_hashPtr(base+1, hBits, mls)] = 1; - hashTable[ZSTD_HC_hashPtr(base+2, hBits, mls)] = 2; - hashTable[ZSTD_HC_hashPtr(base+3, hBits, mls)] = 3; + hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1; + hashTable[ZSTD_hashPtr(base+2, hBits, mls)] = 2; + hashTable[ZSTD_hashPtr(base+3, hBits, mls)] = 3; ip = base+4; } ZSTD_resetSeqStore(seqStorePtr); @@ -251,7 +250,7 @@ size_t ZSTD_HC_compressBlock_fast_generic(ZSTD_HC_CCtx* ctx, /* Main Search Loop */ while (ip < ilimit) /* < instead of <=, because unconditionnal ZSTD_addPtr(ip+1) */ { - const size_t h = ZSTD_HC_hashPtr(ip, hBits, mls); + const size_t h = ZSTD_hashPtr(ip, hBits, mls); const BYTE* match = base + hashTable[h]; hashTable[h] = (U32)(ip-base); @@ -271,11 +270,11 @@ size_t ZSTD_HC_compressBlock_fast_generic(ZSTD_HC_CCtx* ctx, ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength); /* Fill Table */ - hashTable[ZSTD_HC_hashPtr(ip+1, hBits, mls)] = (U32)(ip+1-base); + hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = (U32)(ip+1-base); ip += matchLength + MINMATCH; anchor = ip; if (ip < ilimit) /* same test as loop, for speed */ - hashTable[ZSTD_HC_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); } } @@ -292,7 +291,7 @@ size_t ZSTD_HC_compressBlock_fast_generic(ZSTD_HC_CCtx* ctx, } -size_t ZSTD_HC_compressBlock_fast(ZSTD_HC_CCtx* ctx, +size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { @@ -301,13 +300,13 @@ size_t ZSTD_HC_compressBlock_fast(ZSTD_HC_CCtx* ctx, { default: case 4 : - return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 4); + return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 4); case 5 : - return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 5); + return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 5); case 6 : - return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 6); + return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 6); case 7 : - return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 7); + return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 7); } } @@ -315,13 +314,13 @@ size_t ZSTD_HC_compressBlock_fast(ZSTD_HC_CCtx* ctx, /* ************************************* * Binary Tree search ***************************************/ -/** ZSTD_HC_insertBt1 : add one ptr to tree +/** ZSTD_insertBt1 : add one ptr to tree @ip : assumed <= iend-8 */ -static U32 ZSTD_HC_insertBt1(ZSTD_HC_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; - const size_t h = ZSTD_HC_hashPtr(ip, hashLog, mls); + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); U32* const bt = zc->contentTable; const U32 btLog = zc->params.contentLog - 1; const U32 btMask= (1 << btLog) - 1; @@ -383,15 +382,15 @@ static U32 ZSTD_HC_insertBt1(ZSTD_HC_CCtx* zc, const BYTE* const ip, const U32 m FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HC_insertBtAndFindBestMatch ( - ZSTD_HC_CCtx* zc, +size_t ZSTD_insertBtAndFindBestMatch ( + ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, size_t* offsetPtr, U32 nbCompares, const U32 mls) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; - const size_t h = ZSTD_HC_hashPtr(ip, hashLog, mls); + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); U32* const bt = zc->contentTable; const U32 btLog = zc->params.contentLog - 1; const U32 btMask= (1 << btLog) - 1; @@ -452,7 +451,7 @@ size_t ZSTD_HC_insertBtAndFindBestMatch ( } -static const BYTE* ZSTD_HC_updateTree(ZSTD_HC_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +static const BYTE* ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) { const BYTE* const base = zc->base; const U32 target = (U32)(ip - base); @@ -460,8 +459,8 @@ static const BYTE* ZSTD_HC_updateTree(ZSTD_HC_CCtx* zc, const BYTE* const ip, co //size_t dummy; for( ; idx < target ; ) - idx += ZSTD_HC_insertBt1(zc, base+idx, mls, iend, nbCompares); - //ZSTD_HC_insertBtAndFindBestMatch(zc, base+idx, iend, &dummy, nbCompares, mls); + idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares); + //ZSTD_insertBtAndFindBestMatch(zc, base+idx, iend, &dummy, nbCompares, mls); zc->nextToUpdate = idx; return base + idx; @@ -470,25 +469,25 @@ static const BYTE* ZSTD_HC_updateTree(ZSTD_HC_CCtx* zc, const BYTE* const ip, co /** Tree updater, providing best match */ FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HC_BtFindBestMatch ( - ZSTD_HC_CCtx* zc, +size_t ZSTD_BtFindBestMatch ( + ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 mls) { - const BYTE* nextToUpdate = ZSTD_HC_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + const BYTE* nextToUpdate = ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); if (nextToUpdate > ip) { /* RLE data */ *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); } - return ZSTD_HC_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls); } -FORCE_INLINE size_t ZSTD_HC_BtFindBestMatch_selectMLS ( - ZSTD_HC_CCtx* zc, /* Index table will be updated */ +FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) @@ -496,9 +495,9 @@ FORCE_INLINE size_t ZSTD_HC_BtFindBestMatch_selectMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_HC_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HC_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HC_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); } } @@ -510,7 +509,7 @@ FORCE_INLINE size_t ZSTD_HC_BtFindBestMatch_selectMLS ( #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] /* Update chains up to ip (excluded) */ -static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) +static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; @@ -522,20 +521,20 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U while(idx < target) { - size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls); + size_t h = ZSTD_hashPtr(base+idx, hashLog, mls); NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; hashTable[h] = idx; idx++; } zc->nextToUpdate = target; - return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; } FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HC_HcFindBestMatch ( - ZSTD_HC_CCtx* zc, /* Index table will be updated */ +size_t ZSTD_HcFindBestMatch ( + ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) @@ -554,7 +553,7 @@ size_t ZSTD_HC_HcFindBestMatch ( size_t ml=0; /* HC4 match finder */ - matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch); + matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch); while ((matchIndex>lowLimit) && (nbAttempts)) { @@ -596,8 +595,8 @@ size_t ZSTD_HC_HcFindBestMatch ( } -FORCE_INLINE size_t ZSTD_HC_HcFindBestMatch_selectMLS ( - ZSTD_HC_CCtx* zc, /* Index table will be updated */ +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) @@ -605,16 +604,16 @@ FORCE_INLINE size_t ZSTD_HC_HcFindBestMatch_selectMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_HC_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HC_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HC_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + case 4 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); } } /* common lazy function, to be inlined */ FORCE_INLINE -size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx, +size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */ { @@ -629,10 +628,10 @@ size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx, const U32 maxSearches = 1 << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; - typedef size_t (*searchMax_f)(ZSTD_HC_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_HC_BtFindBestMatch_selectMLS : ZSTD_HC_HcFindBestMatch_selectMLS; + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; /* init */ ZSTD_resetSeqStore(seqStorePtr); @@ -750,23 +749,23 @@ size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx, seqStorePtr, srcSize); } -size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 1); + return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 1); } -size_t ZSTD_HC_compressBlock_lazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); } -size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); } -size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -817,7 +816,7 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS /* search */ { size_t offset=999999; - size_t matchLength = ZSTD_HC_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); + size_t matchLength = ZSTD_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (matchLength < MINMATCH) { ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ @@ -849,35 +848,35 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS } -typedef size_t (*ZSTD_HC_blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); -static ZSTD_HC_blockCompressor ZSTD_HC_selectBlockCompressor(ZSTD_HC_strategy strat) +static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat) { switch(strat) { default : - case ZSTD_HC_fast: - return ZSTD_HC_compressBlock_fast; - case ZSTD_HC_greedy: - return ZSTD_HC_compressBlock_greedy; - case ZSTD_HC_lazy: - return ZSTD_HC_compressBlock_lazy; - case ZSTD_HC_lazy2: - return ZSTD_HC_compressBlock_lazy2; - case ZSTD_HC_btlazy2: - return ZSTD_HC_compressBlock_btlazy2; + case ZSTD_fast: + return ZSTD_compressBlock_fast; + case ZSTD_greedy: + return ZSTD_compressBlock_greedy; + case ZSTD_lazy: + return ZSTD_compressBlock_lazy; + case ZSTD_lazy2: + return ZSTD_compressBlock_lazy2; + case ZSTD_btlazy2: + return ZSTD_compressBlock_btlazy2; } } -size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - ZSTD_HC_blockCompressor blockCompressor = ZSTD_HC_selectBlockCompressor(ctx->params.strategy); + ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy); return blockCompressor(ctx, dst, maxDstSize, src, srcSize); } -static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, +static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { @@ -886,7 +885,7 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, const BYTE* ip = (const BYTE*)src; BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; - const ZSTD_HC_blockCompressor blockCompressor = ZSTD_HC_selectBlockCompressor(ctxPtr->params.strategy); + const ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctxPtr->params.strategy); while (remaining) { @@ -921,7 +920,7 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, } -size_t ZSTD_HC_compressContinue (ZSTD_HC_CCtx* ctxPtr, +size_t ZSTD_compressContinue (ZSTD_CCtx* ctxPtr, void* dst, size_t dstSize, const void* src, size_t srcSize) { @@ -931,39 +930,39 @@ size_t ZSTD_HC_compressContinue (ZSTD_HC_CCtx* ctxPtr, if (ip != ctxPtr->end) { if (ctxPtr->end != NULL) - ZSTD_HC_resetCCtx_advanced(ctxPtr, ctxPtr->params, srcSize); + ZSTD_resetCCtx_advanced(ctxPtr, ctxPtr->params, srcSize); ctxPtr->base = ip; } ctxPtr->end = ip + srcSize; - return ZSTD_HC_compress_generic (ctxPtr, dst, dstSize, src, srcSize); + return ZSTD_compress_generic (ctxPtr, dst, dstSize, src, srcSize); } -size_t ZSTD_HC_compressBegin_advanced(ZSTD_HC_CCtx* ctx, +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, - const ZSTD_HC_parameters params, + const ZSTD_parameters params, U64 srcSizeHint) { size_t errorCode; if (maxDstSize < 4) return ERROR(dstSize_tooSmall); - errorCode = ZSTD_HC_resetCCtx_advanced(ctx, params, srcSizeHint); + errorCode = ZSTD_resetCCtx_advanced(ctx, params, srcSizeHint); if (ZSTD_isError(errorCode)) return errorCode; MEM_writeLE32(dst, ZSTD_magicNumber); /* Write Header */ return 4; } -size_t ZSTD_HC_compressBegin(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint) +size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint) { int tableID = ((srcSizeHint-1) > 128 KB); /* intentional underflow for 0 */ if (compressionLevel<=0) compressionLevel = 1; - if (compressionLevel > ZSTD_HC_MAX_CLEVEL) compressionLevel = ZSTD_HC_MAX_CLEVEL; - return ZSTD_HC_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_HC_defaultParameters[tableID][compressionLevel], srcSizeHint); + if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; + return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_defaultParameters[tableID][compressionLevel], srcSizeHint); } -size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize) +size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize) { BYTE* op = (BYTE*)dst; @@ -979,10 +978,10 @@ size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize) return 3; } -size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx, +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, - ZSTD_HC_parameters params) + ZSTD_parameters params) { BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; @@ -991,46 +990,47 @@ size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx, /* correct params, to use less memory */ { U32 srcLog = ZSTD_highbit((U32)srcSize-1) + 1; - U32 contentBtPlus = (ctx->params.strategy == ZSTD_HC_btlazy2); + U32 contentBtPlus = (ctx->params.strategy == ZSTD_btlazy2); if (params.windowLog > srcLog) params.windowLog = srcLog; if (params.contentLog > srcLog+contentBtPlus) params.contentLog = srcLog+contentBtPlus; } /* Header */ - oSize = ZSTD_HC_compressBegin_advanced(ctx, dst, maxDstSize, params, srcSize); + oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params, srcSize); if(ZSTD_isError(oSize)) return oSize; op += oSize; maxDstSize -= oSize; /* body (compression) */ ctx->base = (const BYTE*)src; - oSize = ZSTD_HC_compress_generic (ctx, op, maxDstSize, src, srcSize); + oSize = ZSTD_compress_generic (ctx, op, maxDstSize, src, srcSize); if(ZSTD_isError(oSize)) return oSize; op += oSize; maxDstSize -= oSize; /* Close frame */ - oSize = ZSTD_HC_compressEnd(ctx, op, maxDstSize); + oSize = ZSTD_compressEnd(ctx, op, maxDstSize); if(ZSTD_isError(oSize)) return oSize; op += oSize; return (op - ostart); } -size_t ZSTD_HC_compressCCtx (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) +size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) { const int tableID = (srcSize > 128 KB); - if (compressionLevel<=1) return ZSTD_compress(dst, maxDstSize, src, srcSize); /* fast mode */ - if (compressionLevel > ZSTD_HC_MAX_CLEVEL) compressionLevel = ZSTD_HC_MAX_CLEVEL; - return ZSTD_HC_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_HC_defaultParameters[tableID][compressionLevel]); + //if (compressionLevel<=1) return ZSTD_compress(dst, maxDstSize, src, srcSize); /* fast mode */ + if (compressionLevel < 1) compressionLevel = 1; + if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; + return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_defaultParameters[tableID][compressionLevel]); } -size_t ZSTD_HC_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) +size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) { size_t result; - ZSTD_HC_CCtx ctxBody; + ZSTD_CCtx ctxBody; memset(&ctxBody, 0, sizeof(ctxBody)); - result = ZSTD_HC_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel); + result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel); free(ctxBody.workSpace); return result; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c new file mode 100644 index 00000000000..f3edeb6f2c4 --- /dev/null +++ b/lib/zstd_decompress.c @@ -0,0 +1,1103 @@ +/* + zstd - standard compression library + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! +* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +*/ +#define ZSTD_MEMORY_USAGE 16 + +/*! + * HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0, fastest), or in memory heap (1, requires malloc()) + * Note that compression context is fairly large, as a consequence heap memory is recommended. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif /* ZSTD_HEAPMODE */ + +/*! +* LEGACY_SUPPORT : +* decompressor can decode older formats (starting from Zstd 0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 1 +#endif + + +/* ******************************************************* +* Includes +*********************************************************/ +#include /* calloc */ +#include /* memcpy, memmove */ +#include /* debug : printf */ +#include "mem.h" /* low level memory routines */ +#include "zstd_static.h" +#include "zstd_internal.h" +#include "fse_static.h" +#include "huff0.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) +# include "zstd_legacy.h" +#endif + + +/* ******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef __AVX2__ +# include /* AVX2 intrinsics */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#else +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ******************************************************* +* Constants +*********************************************************/ +#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) +#define HASH_TABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASH_TABLESIZE - 1) + +#define KNUTH 2654435761 + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BLOCKSIZE (128 KB) /* define, for static allocation */ +#define IS_RAW BIT0 +#define IS_RLE BIT1 + +static const U32 g_maxDistance = 4 * BLOCKSIZE; +static const U32 g_maxLimit = 1 GB; + +#define WORKPLACESIZE (BLOCKSIZE*3) +#define MINMATCH 4 +#define LitFSELog 11 +#define MLFSELog 10 +#define LLFSELog 10 +#define OffFSELog 9 +#define MAX(a,b) ((a)<(b)?(b):(a)) +#define MaxSeq MAX(MaxLL, MaxML) + +#define LITERAL_NOENTROPY 63 +#define COMMAND_NOENTROPY 7 /* to remove */ + +static const size_t ZSTD_blockHeaderSize = 3; +static const size_t ZSTD_frameHeaderSize = 4; + + +/* ******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/* ************************************** +* Local structures +****************************************/ +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->offset = ssPtr->offsetStart; + ssPtr->lit = ssPtr->litStart; + ssPtr->litLength = ssPtr->litLengthStart; + ssPtr->matchLength = ssPtr->matchLengthStart; + ssPtr->dumps = ssPtr->dumpsStart; +} + + +/* ************************************* +* Error Management +***************************************/ +/*! ZSTD_isError +* tells if a return value is an error code */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName +* provides error code string (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } + + +/* ******************************************************* +* Compression +*********************************************************/ +size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */ +{ + return FSE_compressBound(srcSize) + 12; +} + + +size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall); + memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); + + /* Build header */ + ostart[0] = (BYTE)(srcSize>>16); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE) srcSize; + ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ + + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_compressRawLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall); + + MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW); + memcpy(ostart + 3, src, srcSize); + return srcSize + 3; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + (void)maxDstSize; + MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE); /* note : maxDstSize > litHeaderSize > 4 */ + ostart[3] = *(const BYTE*)src; + return 4; +} + +size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; } + +static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + const size_t minGain = ZSTD_minGain(srcSize); + BYTE* const ostart = (BYTE*)dst; + size_t hsize; + static const size_t litHeaderSize = 5; + + if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + + hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize); + + if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_compressRawLiteralsBlock(dst, maxDstSize, src, srcSize); + if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize); + + /* Build header */ + { + ostart[0] = (BYTE)(srcSize << 2); /* is a block, is compressed */ + ostart[1] = (BYTE)(srcSize >> 6); + ostart[2] = (BYTE)(srcSize >>14); + ostart[2] += (BYTE)(hsize << 5); + ostart[3] = (BYTE)(hsize >> 3); + ostart[4] = (BYTE)(hsize >>11); + } + + return hsize+litHeaderSize; +} + + +size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, + const seqStore_t* seqStorePtr, + size_t srcSize) +{ + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + size_t mostFrequent; + U32 max = 255; + U32 tableLog = 11; + U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )]; + U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)]; + U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )]; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const BYTE* const op_lit_start = seqStorePtr->litStart; + const BYTE* const llTable = seqStorePtr->litLengthStart; + const BYTE* const llPtr = seqStorePtr->litLength; + const BYTE* const mlTable = seqStorePtr->matchLengthStart; + const U32* const offsetTable = seqStorePtr->offsetStart; + BYTE* const offCodeTable = seqStorePtr->offCodeStart; + BYTE* op = dst; + BYTE* const oend = dst + maxDstSize; + const size_t nbSeq = llPtr - llTable; + const size_t minGain = ZSTD_minGain(srcSize); + const size_t maxCSize = srcSize - minGain; + BYTE* seqHead; + + + /* Compress literals */ + { + size_t cSize; + size_t litSize = seqStorePtr->lit - op_lit_start; + + if (litSize <= LITERAL_NOENTROPY) + cSize = ZSTD_compressRawLiteralsBlock(op, maxDstSize, op_lit_start, litSize); + else + cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize); + if (ZSTD_isError(cSize)) return cSize; + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < MIN_SEQUENCES_SIZE) + return ERROR(dstSize_tooSmall); + MEM_writeLE16(op, (U16)nbSeq); op+=2; + seqHead = op; + + /* dumps : contains too large lengths */ + { + size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; + if (dumpsLength < 512) + { + op[0] = (BYTE)(dumpsLength >> 8); + op[1] = (BYTE)(dumpsLength); + op += 2; + } + else + { + op[0] = 2; + op[1] = (BYTE)(dumpsLength>>8); + op[2] = (BYTE)(dumpsLength); + op += 3; + } + if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall); + memcpy(op, seqStorePtr->dumpsStart, dumpsLength); + op += dumpsLength; + } + + /* CTable for Literal Lengths */ + max = MaxLL; + mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) + { + *op++ = *(seqStorePtr->litLengthStart); + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = bt_rle; + } + else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1)))) + { + FSE_buildCTable_raw(CTable_LitLength, LLbits); + LLtype = bt_raw; + } + else + { + size_t NCountSize; + tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = bt_compressed; + } + + /* CTable for Offsets codes */ + { + /* create Offset codes */ + size_t i; + max = MaxOff; + for (i=0; i 2)) + { + *op++ = *offCodeTable; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = bt_rle; + } + else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1)))) + { + FSE_buildCTable_raw(CTable_OffsetBits, Offbits); + Offtype = bt_raw; + } + else + { + size_t NCountSize; + tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = bt_compressed; + } + + /* CTable for MatchLengths */ + max = MaxML; + mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) + { + *op++ = *seqStorePtr->matchLengthStart; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = bt_rle; + } + else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1)))) + { + FSE_buildCTable_raw(CTable_MatchLength, MLbits); + MLtype = bt_raw; + } + else + { + size_t NCountSize; + tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = bt_compressed; + } + + seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + /* Encoding Sequences */ + { + size_t streamSize, errorCode; + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + int i; + + errorCode = BIT_initCStream(&blockStream, op, oend-op); + if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); /* not enough space remaining */ + FSE_initCState(&stateMatchLength, CTable_MatchLength); + FSE_initCState(&stateOffsetBits, CTable_OffsetBits); + FSE_initCState(&stateLitLength, CTable_LitLength); + + for (i=(int)nbSeq-1; i>=0; i--) + { + BYTE matchLength = mlTable[i]; + U32 offset = offsetTable[i]; + BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/ + U32 nbBits = (offCode-1) * (!!offCode); + BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + BIT_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */ + FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */ + BIT_flushBits(&blockStream); /* 7 */ /* 7 */ + } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } + + /* check compressibility */ + if ((size_t)(op-dst) >= maxCSize) return 0; + + return op - dst; +} + + + + +/* ************************************************************* +* Decompression section +***************************************************************/ +struct ZSTD_DCtx_s +{ + U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; + U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; + U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; + void* previousDstEnd; + void* base; + size_t expected; + blockType_t bType; + U32 phase; + const BYTE* litPtr; + size_t litBufSize; + size_t litSize; + BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */]; +}; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */ + + +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + const BYTE* const in = (const BYTE* const)src; + BYTE headerFlags; + U32 cSize; + + if (srcSize < 3) return ERROR(srcSize_wrong); + + headerFlags = *in; + cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); + + bpPtr->blockType = (blockType_t)(headerFlags >> 6); + bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; + + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; +} + +static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +/** ZSTD_decompressLiterals + @return : nb of bytes read from src, or an error code*/ +static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + + const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ + const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ + + if (litSize > *maxDstSizePtr) return ERROR(corruption_detected); + if (litCSize + 5 > srcSize) return ERROR(corruption_detected); + + if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected); + + *maxDstSizePtr = litSize; + return litCSize + 5; +} + + +/** ZSTD_decodeLiteralsBlock + @return : nb of bytes read from src (< srcSize )*/ +size_t ZSTD_decodeLiteralsBlock(void* ctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; + const BYTE* const istart = (const BYTE*) src; + + /* any compressed block with literals segment must be at least this size */ + if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + + switch(*istart & 3) + { + /* compressed */ + case 0: + { + size_t litSize = BLOCKSIZE; + const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = BLOCKSIZE+8; + dctx->litSize = litSize; + return readSize; /* works if it's an error too */ + } + case IS_RAW: + { + const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ + if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ + { + if (litSize > srcSize-3) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = BLOCKSIZE+8; + dctx->litSize = litSize; + return litSize+3; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+3; + dctx->litBufSize = srcSize-3; + dctx->litSize = litSize; + return litSize+3; } + case IS_RLE: + { + const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ + if (litSize > BLOCKSIZE) return ERROR(corruption_detected); + memset(dctx->litBuffer, istart[3], litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = BLOCKSIZE+8; + dctx->litSize = litSize; + return 4; + } + default: + return ERROR(corruption_detected); /* forbidden nominal case */ + } +} + + +size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + const BYTE* const iend = istart + srcSize; + U32 LLtype, Offtype, MLtype; + U32 LLlog, Offlog, MLlog; + size_t dumpsLength; + + /* check */ + if (srcSize < 5) return ERROR(srcSize_wrong); + + /* SeqHead */ + *nbSeq = MEM_readLE16(ip); ip+=2; + LLtype = *ip >> 6; + Offtype = (*ip >> 4) & 3; + MLtype = (*ip >> 2) & 3; + if (*ip & 2) + { + dumpsLength = ip[2]; + dumpsLength += ip[1] << 8; + ip += 3; + } + else + { + dumpsLength = ip[1]; + dumpsLength += (ip[0] & 1) << 8; + ip += 2; + } + *dumpsPtr = ip; + ip += dumpsLength; + *dumpsLengthPtr = dumpsLength; + + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + + /* sequences */ + { + S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ + size_t headerSize; + + /* Build DTables */ + switch(LLtype) + { + U32 max; + case bt_rle : + LLlog = 0; + FSE_buildDTable_rle(DTableLL, *ip++); break; + case bt_raw : + LLlog = LLbits; + FSE_buildDTable_raw(DTableLL, LLbits); break; + default : + max = MaxLL; + headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (LLlog > LLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableLL, norm, max, LLlog); + } + + switch(Offtype) + { + U32 max; + case bt_rle : + Offlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */ + break; + case bt_raw : + Offlog = Offbits; + FSE_buildDTable_raw(DTableOffb, Offbits); break; + default : + max = MaxOff; + headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (Offlog > OffFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableOffb, norm, max, Offlog); + } + + switch(MLtype) + { + U32 max; + case bt_rle : + MLlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableML, *ip++); break; + case bt_raw : + MLlog = MLbits; + FSE_buildDTable_raw(DTableML, MLbits); break; + default : + max = MaxML; + headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (MLlog > MLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableML, norm, max, MLlog); + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t offset; + size_t matchLength; +} seq_t; + +typedef struct { + BIT_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset; + const BYTE* dumps; + const BYTE* dumpsEnd; +} seqState_t; + + +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +{ + size_t litLength; + size_t prevOffset; + size_t offset; + size_t matchLength; + const BYTE* dumps = seqState->dumps; + const BYTE* const de = seqState->dumpsEnd; + + /* Literal length */ + litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); + prevOffset = litLength ? seq->offset : seqState->prevOffset; + seqState->prevOffset = seq->offset; + if (litLength == MaxLL) + { + U32 add = *dumps++; + if (add < 255) litLength += add; + else + { + litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ + dumps += 3; + } + if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + } + + /* Offset */ + { + static const U32 offsetPrefix[MaxOff+1] = { + 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256, + 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, + 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 }; + U32 offsetCode, nbBits; + offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + nbBits = offsetCode - 1; + if (offsetCode==0) nbBits = 0; /* cmove */ + offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + if (offsetCode==0) offset = prevOffset; /* cmove */ + } + + /* MatchLength */ + matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + if (matchLength == MaxML) + { + U32 add = *dumps++; + if (add < 255) matchLength += add; + else + { + matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ + dumps += 3; + } + if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + } + matchLength += MINMATCH; + + /* save result */ + seq->litLength = litLength; + seq->offset = offset; + seq->matchLength = matchLength; + seqState->dumps = dumps; +} + + +static size_t ZSTD_execSequence(BYTE* op, + seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit_8, + BYTE* const base, BYTE* const oend) +{ + static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ + static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ + const BYTE* const ostart = op; + BYTE* const oLitEnd = op + sequence.litLength; + BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_8 = oend-8; + const BYTE* const litEnd = *litPtr + sequence.litLength; + + /* check */ + if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ + if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ + if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */ + + /* copy Literals */ + ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = litEnd; /* update for next sequence */ + + /* copy Match */ + { + const BYTE* match = op - sequence.offset; + + /* check */ + //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer removing this test ?) */ + if (sequence.offset > (size_t)op) return ERROR(corruption_detected); /* address space overflow test (this test seems kept by clang optimizer) */ + if (match < base) return ERROR(corruption_detected); + + /* close range match, overlap */ + if (sequence.offset < 8) + { + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } + else + { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-12) + { + if (op < oend_8) + { + ZSTD_wildcopy(op, match, oend_8 - op); + match += oend_8 - op; + op = oend_8; + } + while (op < oMatchEnd) *op++ = *match++; + } + else + { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + } + } + + return oMatchEnd - ostart; +} + +static size_t ZSTD_decompressSequences( + void* ctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize) +{ + ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; + size_t errorCode, dumpsLength; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; + const BYTE* const litEnd = litPtr + dctx->litSize; + int nbSeq; + const BYTE* dumps; + U32* DTableLL = dctx->LLTable; + U32* DTableML = dctx->MLTable; + U32* DTableOffb = dctx->OffTable; + BYTE* const base = (BYTE*) (dctx->base); + + /* Build Decoding Tables */ + errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + DTableLL, DTableML, DTableOffb, + ip, iend-ip); + if (ZSTD_isError(errorCode)) return errorCode; + ip += errorCode; + + /* Regen sequences */ + { + seq_t sequence; + seqState_t seqState; + + memset(&sequence, 0, sizeof(sequence)); + sequence.offset = 4; + seqState.dumps = dumps; + seqState.dumpsEnd = dumps + dumpsLength; + seqState.prevOffset = 4; + errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); + if (ERR_isError(errorCode)) return ERROR(corruption_detected); + FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); + FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); + FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; ) + { + size_t oneSeqSize; + nbSeq--; + ZSTD_decodeSequence(&sequence, &seqState); + oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litLimit_8, base, oend); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* check if reached exact end */ + if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ + if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ + + /* last literal segment */ + { + size_t lastLLSize = litEnd - litPtr; + if (litPtr > litEnd) return ERROR(corruption_detected); + if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + if (op != litPtr) memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + + +static size_t ZSTD_decompressBlock( + void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + + /* Decode literals sub-block */ + size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + + return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize); +} + + +size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* iend = ip + srcSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; + size_t remainingSize = srcSize; + U32 magicNumber; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + magicNumber = MEM_readLE32(src); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) + if (ZSTD_isLegacy(magicNumber)) + return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber); +#endif + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t decodedSize=0; + size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize); + break; + case bt_raw : + decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet supported */ + break; + case bt_end : + /* end of frame */ + if (remainingSize) return ERROR(srcSize_wrong); + break; + default: + return ERROR(GENERIC); /* impossible */ + } + if (cBlockSize == 0) break; /* bt_end */ + + if (ZSTD_isError(decodedSize)) return decodedSize; + op += decodedSize; + ip += cBlockSize; + remainingSize -= cBlockSize; + } + + return op-ostart; +} + +size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + ZSTD_DCtx ctx; + ctx.base = dst; + return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); +} + + +/* ****************************** +* Streaming Decompression API +********************************/ + +size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize; + dctx->phase = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + return 0; +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); + if (dctx==NULL) return NULL; + ZSTD_resetDCtx(dctx); + return dctx; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + free(dctx); + return 0; +} + +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) +{ + return dctx->expected; +} + +size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + /* Sanity check */ + if (srcSize != ctx->expected) return ERROR(srcSize_wrong); + if (dst != ctx->previousDstEnd) /* not contiguous */ + ctx->base = dst; + + /* Decompress : frame header */ + if (ctx->phase == 0) + { + /* Check frame magic header */ + U32 magicNumber = MEM_readLE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + return 0; + } + + /* Decompress : block header */ + if (ctx->phase == 1) + { + blockProperties_t bp; + size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(blockSize)) return blockSize; + if (bp.blockType == bt_end) + { + ctx->expected = 0; + ctx->phase = 0; + } + else + { + ctx->expected = blockSize; + ctx->bType = bp.blockType; + ctx->phase = 2; + } + + return 0; + } + + /* Decompress : block content */ + { + size_t rSize; + switch(ctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return ERROR(GENERIC); + } + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); + return rSize; + } + +} + + diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 510ca80d075..f29120ede81 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -46,12 +46,46 @@ extern "C" { * Includes ***************************************/ #include "zstd.h" +#include "mem.h" + + +/* ************************************* +* Types +***************************************/ +/** from faster to stronger */ +typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy; + +typedef struct +{ + U32 windowLog; /* largest match distance : impact decompression buffer size */ + U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ + U32 hashLog; /* dispatch table : larger == more memory, faster*/ + U32 searchLog; /* nb of searches : larger == more compression, slower*/ + U32 searchLength; /* size of matches : larger == faster decompression */ + ZSTD_strategy strategy; +} ZSTD_parameters; + + +/* ************************************* +* Advanced function +***************************************/ +/** ZSTD_compress_advanced +* Same as ZSTD_compressCCtx(), with fine-tune control of each compression parameter */ +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + ZSTD_parameters params); + +/** ZSTD_validateParams + correct params value to remain within authorized range + srcSizeHint value is optional, select 0 if not known */ +void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint); /* ************************************* * Streaming functions ***************************************/ -size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint); size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); @@ -77,6 +111,73 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co #define ZSTD_magicNumber 0xFD2FB523 /* v0.3 (current)*/ +/* ************************************* +* Pre-defined compression levels +***************************************/ +#define ZSTD_MAX_CLEVEL 20 +#define ZSTD_WINDOWLOG_MAX 26 +#define ZSTD_WINDOWLOG_MIN 18 +#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CONTENTLOG_MIN 4 +#define ZSTD_HASHLOG_MAX 28 +#define ZSTD_HASHLOG_MIN 4 +#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_SEARCHLENGTH_MAX 7 +#define ZSTD_SEARCHLENGTH_MIN 4 + +static const ZSTD_parameters ZSTD_defaultParameters[2][ZSTD_MAX_CLEVEL+1] = { +{ /* for <= 128 KB */ + /* W, C, H, S, L, strat */ + { 17, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ + { 17, 12, 13, 1, 6, ZSTD_fast }, /* level 1 */ + { 17, 15, 16, 1, 5, ZSTD_fast }, /* level 2 */ + { 17, 16, 17, 1, 5, ZSTD_fast }, /* level 3 */ + { 17, 13, 15, 2, 4, ZSTD_greedy }, /* level 4 */ + { 17, 15, 17, 3, 4, ZSTD_greedy }, /* level 5 */ + { 17, 14, 17, 3, 4, ZSTD_lazy }, /* level 6 */ + { 17, 16, 17, 4, 4, ZSTD_lazy }, /* level 7 */ + { 17, 16, 17, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 16, 5, 4, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 16, 6, 4, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 16, 7, 4, ZSTD_lazy2 }, /* level 11 */ + { 17, 17, 16, 8, 4, ZSTD_lazy2 }, /* level 12 */ + { 17, 18, 16, 4, 4, ZSTD_btlazy2 }, /* level 13 */ + { 17, 18, 16, 5, 4, ZSTD_btlazy2 }, /* level 14 */ + { 17, 18, 16, 6, 4, ZSTD_btlazy2 }, /* level 15 */ + { 17, 18, 16, 7, 4, ZSTD_btlazy2 }, /* level 16 */ + { 17, 18, 16, 8, 4, ZSTD_btlazy2 }, /* level 17 */ + { 17, 18, 16, 9, 4, ZSTD_btlazy2 }, /* level 18 */ + { 17, 18, 16, 10, 4, ZSTD_btlazy2 }, /* level 19 */ + { 17, 18, 18, 12, 4, ZSTD_btlazy2 }, /* level 20 */ +}, +{ /* for > 128 KB */ + /* W, C, H, S, L, strat */ + { 18, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ + { 18, 14, 14, 1, 7, ZSTD_fast }, /* level 1 - in fact redirected towards zstd fast */ + { 19, 15, 16, 1, 6, ZSTD_fast }, /* level 2 */ + { 20, 18, 20, 1, 6, ZSTD_fast }, /* level 3 */ + { 21, 19, 21, 1, 6, ZSTD_fast }, /* level 4 */ + { 20, 13, 18, 5, 5, ZSTD_greedy }, /* level 5 */ + { 20, 17, 19, 3, 5, ZSTD_greedy }, /* level 6 */ + { 21, 17, 20, 3, 5, ZSTD_lazy }, /* level 7 */ + { 21, 19, 20, 3, 5, ZSTD_lazy }, /* level 8 */ + { 21, 20, 20, 3, 5, ZSTD_lazy2 }, /* level 9 */ + { 21, 19, 20, 4, 5, ZSTD_lazy2 }, /* level 10 */ + { 22, 20, 22, 4, 5, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, ZSTD_lazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, ZSTD_lazy2 }, /* level 14 */ + { 23, 23, 23, 5, 5, ZSTD_lazy2 }, /* level 15 */ + { 23, 21, 22, 5, 5, ZSTD_btlazy2 }, /* level 16 */ + { 23, 24, 23, 4, 5, ZSTD_btlazy2 }, /* level 17 */ + { 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */ + { 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */ + { 26, 27, 24, 6, 5, ZSTD_btlazy2 }, /* level 20 */ +} +}; + + /* ************************************* * Error management ***************************************/ diff --git a/lib/zstdhc.h b/lib/zstdhc.h deleted file mode 100644 index fae9d85241d..00000000000 --- a/lib/zstdhc.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - zstdhc - high compression variant - Header File - Copyright (C) 2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : http://www.zstd.net -*/ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Includes -***************************************/ -#include /* size_t */ - - -/* ************************************* -* Simple function -***************************************/ -/** -ZSTD_HC_compress() : - Compresses 'srcSize' bytes from buffer 'src' into buffer 'dst', of maximum size 'dstSize'. - Destination buffer must be already allocated. - Compression runs faster if maxDstSize >= ZSTD_compressBound(srcSize). - @return : the number of bytes written into buffer 'dst' - or an error code if it fails (which can be tested using ZSTD_isError()) -*/ -size_t ZSTD_HC_compress(void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - int compressionLevel); - - -/* ************************************* -* Advanced functions -***************************************/ -typedef struct ZSTD_HC_CCtx_s ZSTD_HC_CCtx; /* incomplete type */ -ZSTD_HC_CCtx* ZSTD_HC_createCCtx(void); -size_t ZSTD_HC_freeCCtx(ZSTD_HC_CCtx* cctx); - -/** -ZSTD_HC_compressCCtx() : - Same as ZSTD_compress(), but requires a ZSTD_HC_CCtx working space already allocated -*/ -size_t ZSTD_HC_compressCCtx(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel); - - -#if defined (__cplusplus) -} -#endif diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h deleted file mode 100644 index a08b00bf3f6..00000000000 --- a/lib/zstdhc_static.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - zstdhc - high compression variant - Header File - Experimental API, static linking only - Copyright (C) 2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - zstd source repository : http://www.zstd.net -*/ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - -/* ************************************* -* Includes -***************************************/ -#include "mem.h" -#include "zstdhc.h" - - -/* ************************************* -* Types -***************************************/ -/** from faster to stronger */ -typedef enum { ZSTD_HC_fast, ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazy2, ZSTD_HC_btlazy2 } ZSTD_HC_strategy; - -typedef struct -{ - U32 windowLog; /* largest match distance : impact decompression buffer size */ - U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ - U32 hashLog; /* dispatch table : larger == more memory, faster*/ - U32 searchLog; /* nb of searches : larger == more compression, slower*/ - U32 searchLength; /* size of matches : larger == faster decompression */ - ZSTD_HC_strategy strategy; -} ZSTD_HC_parameters; - -/* parameters boundaries */ -#define ZSTD_HC_WINDOWLOG_MAX 26 -#define ZSTD_HC_WINDOWLOG_MIN 18 -#define ZSTD_HC_CONTENTLOG_MAX (ZSTD_HC_WINDOWLOG_MAX+1) -#define ZSTD_HC_CONTENTLOG_MIN 4 -#define ZSTD_HC_HASHLOG_MAX 28 -#define ZSTD_HC_HASHLOG_MIN 4 -#define ZSTD_HC_SEARCHLOG_MAX (ZSTD_HC_CONTENTLOG_MAX-1) -#define ZSTD_HC_SEARCHLOG_MIN 1 -#define ZSTD_HC_SEARCHLENGTH_MAX 7 -#define ZSTD_HC_SEARCHLENGTH_MIN 4 - - -/* ************************************* -* Advanced function -***************************************/ -/** ZSTD_HC_compress_advanced -* Same as ZSTD_HC_compressCCtx(), with fine-tune control of each compression parameter */ -size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - ZSTD_HC_parameters params); - -/** ZSTD_HC_validateParams - correct params value to remain within authorized range - srcSizeHint value is optional, select 0 if not known */ -void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, U64 srcSizeHint); - - -/* ************************************* -* Streaming functions -***************************************/ -size_t ZSTD_HC_compressBegin(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint); -size_t ZSTD_HC_compressContinue(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); -size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize); - - -/* ************************************* -* Pre-defined compression levels -***************************************/ -#define ZSTD_HC_MAX_CLEVEL 20 -static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[2][ZSTD_HC_MAX_CLEVEL+1] = { -{ /* for <= 128 KB */ - /* W, C, H, S, L, strat */ - { 17, 12, 12, 1, 4, ZSTD_HC_fast }, /* level 0 - never used */ - { 17, 12, 13, 1, 6, ZSTD_HC_fast }, /* level 1 */ - { 17, 15, 16, 1, 5, ZSTD_HC_fast }, /* level 2 */ - { 17, 16, 17, 1, 5, ZSTD_HC_fast }, /* level 3 */ - { 17, 13, 15, 2, 4, ZSTD_HC_greedy }, /* level 4 */ - { 17, 15, 17, 3, 4, ZSTD_HC_greedy }, /* level 5 */ - { 17, 14, 17, 3, 4, ZSTD_HC_lazy }, /* level 6 */ - { 17, 16, 17, 4, 4, ZSTD_HC_lazy }, /* level 7 */ - { 17, 16, 17, 4, 4, ZSTD_HC_lazy2 }, /* level 8 */ - { 17, 17, 16, 5, 4, ZSTD_HC_lazy2 }, /* level 9 */ - { 17, 17, 16, 6, 4, ZSTD_HC_lazy2 }, /* level 10 */ - { 17, 17, 16, 7, 4, ZSTD_HC_lazy2 }, /* level 11 */ - { 17, 17, 16, 8, 4, ZSTD_HC_lazy2 }, /* level 12 */ - { 17, 18, 16, 4, 4, ZSTD_HC_btlazy2 }, /* level 13 */ - { 17, 18, 16, 5, 4, ZSTD_HC_btlazy2 }, /* level 14 */ - { 17, 18, 16, 6, 4, ZSTD_HC_btlazy2 }, /* level 15 */ - { 17, 18, 16, 7, 4, ZSTD_HC_btlazy2 }, /* level 16 */ - { 17, 18, 16, 8, 4, ZSTD_HC_btlazy2 }, /* level 17 */ - { 17, 18, 16, 9, 4, ZSTD_HC_btlazy2 }, /* level 18 */ - { 17, 18, 16, 10, 4, ZSTD_HC_btlazy2 }, /* level 19 */ - { 17, 18, 18, 12, 4, ZSTD_HC_btlazy2 }, /* level 20 */ -}, -{ /* for > 128 KB */ - /* W, C, H, S, L, strat */ - { 18, 12, 12, 1, 4, ZSTD_HC_fast }, /* level 0 - never used */ - { 18, 14, 14, 1, 7, ZSTD_HC_fast }, /* level 1 - in fact redirected towards zstd fast */ - { 19, 15, 16, 1, 6, ZSTD_HC_fast }, /* level 2 */ - { 20, 18, 20, 1, 6, ZSTD_HC_fast }, /* level 3 */ - { 21, 19, 21, 1, 6, ZSTD_HC_fast }, /* level 4 */ - { 20, 13, 18, 5, 5, ZSTD_HC_greedy }, /* level 5 */ - { 20, 17, 19, 3, 5, ZSTD_HC_greedy }, /* level 6 */ - { 21, 17, 20, 3, 5, ZSTD_HC_lazy }, /* level 7 */ - { 21, 19, 20, 3, 5, ZSTD_HC_lazy }, /* level 8 */ - { 21, 20, 20, 3, 5, ZSTD_HC_lazy2 }, /* level 9 */ - { 21, 19, 20, 4, 5, ZSTD_HC_lazy2 }, /* level 10 */ - { 22, 20, 22, 4, 5, ZSTD_HC_lazy2 }, /* level 11 */ - { 22, 20, 22, 5, 5, ZSTD_HC_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, ZSTD_HC_lazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, ZSTD_HC_lazy2 }, /* level 14 */ - { 23, 23, 23, 5, 5, ZSTD_HC_lazy2 }, /* level 15 */ - { 23, 21, 22, 5, 5, ZSTD_HC_btlazy2 }, /* level 16 */ - { 23, 24, 23, 4, 5, ZSTD_HC_btlazy2 }, /* level 17 */ - { 25, 24, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 18 */ - { 25, 26, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 19 */ - { 26, 27, 24, 6, 5, ZSTD_HC_btlazy2 }, /* level 20 */ -} -}; - - -#if defined (__cplusplus) -} -#endif diff --git a/programs/Makefile b/programs/Makefile index 472222c4b02..ac713c3f9c7 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -58,37 +58,37 @@ default: zstd all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 paramgrill datagen -zstd: $(ZSTDDIR)/zstd.c $(ZSTDDIR)/zstdhc.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +zstd: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ xxhash.c bench.c fileio.c zstdcli.c legacy/fileio_legacy.c $(CC) $(FLAGS) $^ -o $@$(EXT) -zstd32: $(ZSTDDIR)/zstd.c $(ZSTDDIR)/zstdhc.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +zstd32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ xxhash.c bench.c fileio.c zstdcli.c legacy/fileio_legacy.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -fullbench : $(ZSTDDIR)/zstd.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +fullbench : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) -fullbench32: $(ZSTDDIR)/zstd.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +fullbench32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c fullbench.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -fuzzer : $(ZSTDDIR)/zstd.c $(ZSTDDIR)/zstdhc.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +fuzzer : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c xxhash.c fuzzer.c $(CC) $(FLAGS) $^ -o $@$(EXT) -fuzzer32: $(ZSTDDIR)/zstd.c $(ZSTDDIR)/zstdhc.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +fuzzer32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c xxhash.c fuzzer.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -paramgrill : $(ZSTDDIR)/zstdhc.c $(ZSTDDIR)/zstd.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +paramgrill : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c xxhash.c paramgrill.c $(CC) $(FLAGS) $^ -lm -o $@$(EXT) diff --git a/programs/bench.c b/programs/bench.c index 5d10f9fb2b4..fa7ec5579b7 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -61,7 +61,6 @@ #include "mem.h" #include "zstd.h" -#include "zstdhc.h" #include "xxhash.h" @@ -231,12 +230,6 @@ typedef struct typedef size_t (*compressor_t) (void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel); -static size_t local_compress_fast (void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) -{ - (void)compressionLevel; - return ZSTD_compress(dst, maxDstSize, src, srcSize); -} - #define MIN(a,b) ((a)<(b) ? (a) : (b)) static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, int cLevel) @@ -247,7 +240,7 @@ static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, i const size_t maxCompressedSize = (size_t)nbBlocks * ZSTD_compressBound(blockSize); void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); - const compressor_t compressor = (cLevel <= 1) ? local_compress_fast : ZSTD_HC_compress; + const compressor_t compressor = ZSTD_compress; U64 crcOrig; /* init */ @@ -413,7 +406,7 @@ static size_t BMK_findMaxMem(U64 requiredMem) return (size_t)(requiredMem - step); } -static int BMK_benchOneFile(char* inFileName, int cLevel) +static int BMK_benchOneFile(const char* inFileName, int cLevel) { FILE* inFile; U64 inFileSize; @@ -513,7 +506,7 @@ static int BMK_syntheticTest(int cLevel, double compressibility) } -int BMK_benchFiles(char** fileNamesTable, unsigned nbFiles, unsigned cLevel) +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, unsigned cLevel) { double compressibility = (double)g_compressibilityDefault / 100; diff --git a/programs/fileio.c b/programs/fileio.c index 4ef808face6..04a609fed63 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -68,7 +68,6 @@ #include "mem.h" #include "fileio.h" #include "zstd_static.h" -#include "zstdhc_static.h" #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) # include "zstd_legacy.h" /* legacy */ @@ -237,46 +236,6 @@ static U64 FIO_getFileSize(const char* infilename) } -typedef void* (*FIO_createC) (void); -static void* local_ZSTD_createCCtx(void) { return (void*) ZSTD_createCCtx(); } -static void* local_ZSTD_HC_createCCtx(void) { return (void*) ZSTD_HC_createCCtx(); } - -typedef size_t (*FIO_initC) (void* ctx, void* dst, size_t maxDstSize, int cLevel, U64 srcSizeHint); -static size_t local_ZSTD_compressBegin (void* ctx, void* dst, size_t maxDstSize, int cLevel, U64 srcSizeHint) -{ - (void)cLevel; (void)srcSizeHint; - return ZSTD_compressBegin((ZSTD_CCtx*)ctx, dst, maxDstSize); -} -static size_t local_ZSTD_HC_compressBegin (void* ctx, void* dst, size_t maxDstSize, int cLevel, U64 srcSizeHint) -{ - return ZSTD_HC_compressBegin((ZSTD_HC_CCtx*)ctx, dst, maxDstSize, cLevel, srcSizeHint); -} - -typedef size_t (*FIO_continueC) (void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); -static size_t local_ZSTD_compressContinue (void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - return ZSTD_compressContinue((ZSTD_CCtx*)ctx, dst, maxDstSize, src, srcSize); -} -static size_t local_ZSTD_HC_compressContinue (void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - return ZSTD_HC_compressContinue((ZSTD_HC_CCtx*)ctx, dst, maxDstSize, src, srcSize); -} - -typedef size_t (*FIO_endC) (void* ctx, void* dst, size_t maxDstSize); -static size_t local_ZSTD_compressEnd (void* ctx, void* dst, size_t maxDstSize) -{ - return ZSTD_compressEnd((ZSTD_CCtx*)ctx, dst, maxDstSize); -} -static size_t local_ZSTD_HC_compressEnd (void* ctx, void* dst, size_t maxDstSize) -{ - return ZSTD_HC_compressEnd((ZSTD_HC_CCtx*)ctx, dst, maxDstSize); -} - -typedef void (*FIO_freeC) (void* ctx); -static void local_ZSTD_freeCCtx(void* ctx) { ZSTD_freeCCtx((ZSTD_CCtx*)ctx); } -static void local_ZSTD_HC_freeCCtx(void* ctx) { ZSTD_HC_freeCCtx((ZSTD_HC_CCtx*)ctx); } - - unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, int cLevel) { U64 filesize = 0; @@ -291,35 +250,14 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* FILE* finput; FILE* foutput; size_t sizeCheck, cSize; - void* ctx; - FIO_createC createC=NULL; - FIO_initC initC=NULL; - FIO_continueC continueC = NULL; - FIO_endC endC = NULL; - FIO_freeC freeC = NULL; + ZSTD_CCtx* ctx; - /* Init */ - if (cLevel <= 1) - { - createC = local_ZSTD_createCCtx; - initC = local_ZSTD_compressBegin; - continueC = local_ZSTD_compressContinue; - endC = local_ZSTD_compressEnd; - freeC = local_ZSTD_freeCCtx; - } - else - { - createC = local_ZSTD_HC_createCCtx; - initC = local_ZSTD_HC_compressBegin; - continueC = local_ZSTD_HC_compressContinue; - endC = local_ZSTD_HC_compressEnd; - freeC = local_ZSTD_HC_freeCCtx; - } + /* init */ FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); filesize = FIO_getFileSize(input_filename); /* Allocate Memory */ - ctx = createC(); + ctx = ZSTD_createCCtx(); inBuff = (BYTE*)malloc(inBuffSize); outBuff = (BYTE*)malloc(outBuffSize); if (!inBuff || !outBuff || !ctx) EXM_THROW(21, "Allocation error : not enough memory"); @@ -327,7 +265,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* inEnd = inBuff + inBuffSize; /* Write Frame Header */ - cSize = initC(ctx, outBuff, outBuffSize, cLevel, filesize); + cSize = ZSTD_compressBegin(ctx, outBuff, outBuffSize, cLevel, filesize); if (ZSTD_isError(cSize)) EXM_THROW(22, "Compression error : cannot create frame header"); sizeCheck = fwrite(outBuff, 1, cSize, foutput); @@ -348,7 +286,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); /* Compress Block */ - cSize = continueC(ctx, outBuff, outBuffSize, inSlot, inSize); + cSize = ZSTD_compressContinue(ctx, outBuff, outBuffSize, inSlot, inSize); if (ZSTD_isError(cSize)) EXM_THROW(24, "Compression error : %s ", ZSTD_getErrorName(cSize)); @@ -362,7 +300,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* } /* End of Frame */ - cSize = endC(ctx, outBuff, outBuffSize); + cSize = ZSTD_compressEnd(ctx, outBuff, outBuffSize); if (ZSTD_isError(cSize)) EXM_THROW(26, "Compression error : cannot create frame end"); sizeCheck = fwrite(outBuff, 1, cSize, foutput); @@ -377,7 +315,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* /* clean */ free(inBuff); free(outBuff); - freeC(ctx); + ZSTD_freeCCtx(ctx); fclose(finput); if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename); diff --git a/programs/fullbench.c b/programs/fullbench.c index ef532361eed..239b4180eb9 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -217,7 +217,7 @@ extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* d size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) { (void)buff2; - return ZSTD_compress(dst, dstSize, src, srcSize); + return ZSTD_compress(dst, dstSize, src, srcSize, 1); } size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) @@ -325,12 +325,12 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) switch(benchNb) { case 11: - g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize); + g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1); break; case 31: /* ZSTD_decodeLiteralsBlock */ { blockProperties_t bp; - g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize); + g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); ZSTD_getcBlockSize(dstBuff+4, dstBuffSize, &bp); // Get first block type if (bp.blockType != bt_compressed) { @@ -349,7 +349,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) const BYTE* ip = dstBuff; const BYTE* iend; size_t blockSize; - ZSTD_compress(dstBuff, dstBuffSize, src, srcSize); + ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); ip += 4; // Jump magic Number blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); // Get first block type if (bp.blockType != bt_compressed) @@ -380,7 +380,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) case 102: /* local_decodeLiteralsForward */ { blockProperties_t bp; - ZSTD_compress(dstBuff, dstBuffSize, src, srcSize); + ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); g_cSize = ZSTD_getcBlockSize(dstBuff+7, dstBuffSize, &bp); memcpy(buff2, dstBuff+10, g_cSize); //srcSize = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize); // real speed diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 7458bba8d9e..ec1ccbd548b 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -47,7 +47,6 @@ #include /* timeb */ #include /* strcmp */ #include "zstd_static.h" -#include "zstdhc_static.h" #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" /* XXH64 */ #include "mem.h" @@ -159,7 +158,7 @@ static int basicUnitTests(U32 seed, double compressibility) /* Basic tests */ DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); - result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH, 1); if (ZSTD_isError(result)) goto _output_error; cSize = result; DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); @@ -213,7 +212,7 @@ static int basicUnitTests(U32 seed, double compressibility) sampleSize += 256 KB - 1; RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., randState); sampleSize += 96 KB; - cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize); + cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1); if (ZSTD_isError(cSize)) goto _output_error; result = ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize); if (ZSTD_isError(result)) goto _output_error; @@ -265,11 +264,11 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit U32 testNb = 0; U32 coreSeed = seed, lseed = 0; ZSTD_CCtx* ctx; - ZSTD_HC_CCtx* hcctx; + ZSTD_CCtx* hcctx; /* allocation */ ctx = ZSTD_createCCtx(); - hcctx = ZSTD_HC_createCCtx(); + hcctx = ZSTD_createCCtx(); cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); @@ -332,8 +331,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit #define MAX(a,b) ((a)>(b)?(a):(b)) cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2)); /* use high compression levels with small samples, for speed */ cLevel = (FUZ_rand(&lseed) % cLevelMod) +1; - cSize = ZSTD_HC_compressCCtx(hcctx, cBuffer, cBufferSize, srcBuffer + sampleStart, sampleSize, cLevel); - CHECK(ZSTD_isError(cSize), "ZSTD_HC_compressCCtx failed"); + cSize = ZSTD_compressCCtx(hcctx, cBuffer, cBufferSize, srcBuffer + sampleStart, sampleSize, cLevel); + CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed"); /* compression failure test : too small dest buffer */ if (cSize > 3) @@ -344,10 +343,10 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit static const U32 endMark = 0x4DC2B1A9; U32 endCheck; memcpy(dstBuffer+tooSmallSize, &endMark, 4); - errorCode = ZSTD_HC_compressCCtx(hcctx, dstBuffer, tooSmallSize, srcBuffer + sampleStart, sampleSize, cLevel); - CHECK(!ZSTD_isError(errorCode), "ZSTD_HC_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); + errorCode = ZSTD_compressCCtx(hcctx, dstBuffer, tooSmallSize, srcBuffer + sampleStart, sampleSize, cLevel); + CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); memcpy(&endCheck, dstBuffer+tooSmallSize, 4); - CHECK(endCheck != endMark, "ZSTD_HC_compressCCtx : dst buffer overflow"); + CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } /* successfull decompression tests*/ @@ -434,7 +433,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit _cleanup: ZSTD_freeCCtx(ctx); - ZSTD_HC_freeCCtx(hcctx); + ZSTD_freeCCtx(hcctx); free(cNoiseBuffer[0]); free(cNoiseBuffer[1]); free(cNoiseBuffer[2]); diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 90cfdd775e4..02fd240f003 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -62,8 +62,7 @@ #endif #include "mem.h" -#include "zstdhc_static.h" -#include "zstd.h" +#include "zstd_static.h" #include "datagen.h" #include "xxhash.h" @@ -122,8 +121,8 @@ static U32 g_rand = 1; static U32 g_singleRun = 0; static U32 g_target = 0; static U32 g_noSeed = 0; -static const ZSTD_HC_parameters* g_seedParams = ZSTD_HC_defaultParameters[0]; -static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0, ZSTD_HC_greedy }; +static const ZSTD_parameters* g_seedParams = ZSTD_defaultParameters[0]; +static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, ZSTD_greedy }; void BMK_SetNbIterations(int nbLoops) { @@ -270,8 +269,8 @@ typedef struct static size_t BMK_benchParam(BMK_result_t* resultPtr, const void* srcBuffer, size_t srcSize, - ZSTD_HC_CCtx* ctx, - const ZSTD_HC_parameters params) + ZSTD_CCtx* ctx, + const ZSTD_parameters params) { const size_t blockSize = g_blockSize ? g_blockSize : srcSize; const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize); @@ -284,7 +283,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, U32 Hlog = params.hashLog; U32 Slog = params.searchLog; U32 Slength = params.searchLength; - ZSTD_HC_strategy strat = params.strategy; + ZSTD_strategy strat = params.strategy; char name[30] = { 0 }; U64 crcOrig; @@ -358,7 +357,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, while (BMK_GetMilliSpan(milliTime) < TIMELOOP) { for (blockNb=0; blockNb (b) ? (a) : (b) ) static void playAround(FILE* f, winnerInfo_t* winners, - ZSTD_HC_parameters params, + ZSTD_parameters params, const void* srcBuffer, size_t srcSize, - ZSTD_HC_CCtx* ctx) + ZSTD_CCtx* ctx) { int nbVariations = 0; const int startTime = BMK_GetMilliStart(); while (BMK_GetMilliSpan(startTime) < g_maxVariationTime) { - ZSTD_HC_parameters p = params; + ZSTD_parameters p = params; U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1; if (nbVariations++ > g_maxNbVariations) break; @@ -643,16 +642,16 @@ static void playAround(FILE* f, winnerInfo_t* winners, case 9: p.searchLength--; break; case 10: - p.strategy = (ZSTD_HC_strategy)(((U32)p.strategy)+1); break; + p.strategy = (ZSTD_strategy)(((U32)p.strategy)+1); break; case 11: - p.strategy = (ZSTD_HC_strategy)(((U32)p.strategy)-1); break; + p.strategy = (ZSTD_strategy)(((U32)p.strategy)-1); break; } } /* validate new conf */ { - ZSTD_HC_parameters saved = p; - ZSTD_HC_validateParams(&p, g_blockSize ? g_blockSize : srcSize); + ZSTD_parameters saved = p; + ZSTD_validateParams(&p, g_blockSize ? g_blockSize : srcSize); if (memcmp(&p, &saved, sizeof(p))) continue; /* p was invalid */ } @@ -675,19 +674,19 @@ static void playAround(FILE* f, winnerInfo_t* winners, static void BMK_selectRandomStart( FILE* f, winnerInfo_t* winners, const void* srcBuffer, size_t srcSize, - ZSTD_HC_CCtx* ctx) + ZSTD_CCtx* ctx) { - U32 id = (FUZ_rand(&g_rand) % (ZSTD_HC_MAX_CLEVEL+1)); + U32 id = (FUZ_rand(&g_rand) % (ZSTD_MAX_CLEVEL+1)); if ((id==0) || (winners[id].params.windowLog==0)) { /* totally random entry */ - ZSTD_HC_parameters p; - p.contentLog = FUZ_rand(&g_rand) % (ZSTD_HC_CONTENTLOG_MAX+1 - ZSTD_HC_CONTENTLOG_MIN) + ZSTD_HC_CONTENTLOG_MIN; - p.hashLog = FUZ_rand(&g_rand) % (ZSTD_HC_HASHLOG_MAX+1 - ZSTD_HC_HASHLOG_MIN) + ZSTD_HC_HASHLOG_MIN; - p.searchLog = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN; - p.windowLog = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN; - p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN; - p.strategy = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) % (ZSTD_HC_btlazy2+1)); + ZSTD_parameters p; + p.contentLog = FUZ_rand(&g_rand) % (ZSTD_CONTENTLOG_MAX+1 - ZSTD_CONTENTLOG_MIN) + ZSTD_CONTENTLOG_MIN; + p.hashLog = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN; + p.searchLog = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN; + p.windowLog = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN; + p.searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN; + p.strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btlazy2+1)); playAround(f, winners, p, srcBuffer, srcSize, ctx); } else @@ -697,9 +696,9 @@ static void BMK_selectRandomStart( static void BMK_benchMem(void* srcBuffer, size_t srcSize) { - ZSTD_HC_CCtx* ctx = ZSTD_HC_createCCtx(); - ZSTD_HC_parameters params; - winnerInfo_t winners[ZSTD_HC_MAX_CLEVEL+1]; + ZSTD_CCtx* ctx = ZSTD_createCCtx(); + ZSTD_parameters params; + winnerInfo_t winners[ZSTD_MAX_CLEVEL+1]; int i; const char* rfName = "grillResults.txt"; FILE* f; @@ -709,7 +708,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) if (g_singleRun) { BMK_result_t testResult; - ZSTD_HC_validateParams(&g_params, blockSize); + ZSTD_validateParams(&g_params, blockSize); BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, g_params); DISPLAY("\n"); return; @@ -731,24 +730,24 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) params.contentLog = 1; params.searchLog = 1; params.searchLength = 7; - params.strategy = ZSTD_HC_fast; - ZSTD_HC_validateParams(¶ms, blockSize); + params.strategy = ZSTD_fast; + ZSTD_validateParams(¶ms, blockSize); BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); g_cSpeedTarget[1] = (testResult.cSpeed * 15) >> 4; } /* establish speed objectives (relative to level 1) */ - for (i=2; i<=ZSTD_HC_MAX_CLEVEL; i++) + for (i=2; i<=ZSTD_MAX_CLEVEL; i++) g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) >> 5; /* populate initial solution */ { const int tableID = (blockSize > 128 KB); - const int maxSeeds = g_noSeed ? 1 : ZSTD_HC_MAX_CLEVEL; - g_seedParams = ZSTD_HC_defaultParameters[tableID]; + const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL; + g_seedParams = ZSTD_defaultParameters[tableID]; for (i=1; i<=maxSeeds; i++) { - const U32 btPlus = (params.strategy == ZSTD_HC_btlazy2); + const U32 btPlus = (params.strategy == ZSTD_btlazy2); params = g_seedParams[i]; params.windowLog = MIN(srcLog, params.windowLog); params.contentLog = MIN(params.windowLog+btPlus, params.contentLog); @@ -775,7 +774,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) /* clean up*/ fclose(f); - ZSTD_HC_freeCCtx(ctx); + ZSTD_freeCCtx(ctx); } @@ -994,12 +993,12 @@ int main(int argc, char** argv) g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0'; continue; case 't': /* strategy */ - g_params.strategy = (ZSTD_HC_strategy)0; + g_params.strategy = (ZSTD_strategy)0; argument++; while ((*argument>= '0') && (*argument<='9')) { - g_params.strategy = (ZSTD_HC_strategy)((U32)g_params.strategy *10); - g_params.strategy = (ZSTD_HC_strategy)((U32)g_params.strategy + *argument++ - '0'); + g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy *10); + g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy + *argument++ - '0'); } continue; case 'L': @@ -1009,7 +1008,7 @@ int main(int argc, char** argv) while ((*argument>= '0') && (*argument<='9')) cLevel *= 10, cLevel += *argument++ - '0'; if (cLevel < 1) cLevel = 1; - if (cLevel > ZSTD_HC_MAX_CLEVEL) cLevel = ZSTD_HC_MAX_CLEVEL; + if (cLevel > ZSTD_MAX_CLEVEL) cLevel = ZSTD_MAX_CLEVEL; g_params = g_seedParams[cLevel]; continue; } From 7f6e91fa98820ed5657c678b2999d0ba1e2c0655 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Nov 2015 14:39:50 +0100 Subject: [PATCH 02/79] fixed library compilation --- Makefile | 2 +- NEWS | 3 +++ lib/Makefile | 4 ++-- lib/zstd.h | 4 ++-- lib/zstd_decompress.c | 4 ---- programs/bench.h | 2 +- programs/zstdcli.c | 8 ++++---- 7 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index cea6cb60cc8..7caaa466112 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ # ################################################################ # Version number -export VERSION := 0.3.6 +export VERSION := 0.4.0 PRGDIR = programs ZSTDDIR = lib diff --git a/NEWS b/NEWS index 0b635d8037a..94bd7db1926 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +v0.4.0 +Removed zstdhc => merged into zstd + v0.3.6 small blocks params diff --git a/lib/Makefile b/lib/Makefile index 7d09111a799..f027076f0d6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -67,10 +67,10 @@ default: libzstd all: libzstd -libzstd: zstd.c huff0.c fse.c +libzstd: zstd_compress.c zstd_decompress.c huff0.c fse.c @echo compiling static library @$(CC) $(FLAGS) -c $^ - @$(AR) rcs libzstd.a zstd.o huff0.o fse.o + @$(AR) rcs libzstd.a zstd_compress.o zstd_decompress.o huff0.o fse.o @echo compiling dynamic library $(LIBVER) @$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER) @echo creating versioned links diff --git a/lib/zstd.h b/lib/zstd.h index 3b18a867d69..96bb184f0a0 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -47,8 +47,8 @@ extern "C" { * Version ***************************************/ #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ -#define ZSTD_VERSION_MINOR 3 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 6 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_MINOR 4 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) unsigned ZSTD_versionNumber (void); diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index f3edeb6f2c4..36d5bfc38d3 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -123,10 +123,6 @@ #define IS_RAW BIT0 #define IS_RLE BIT1 -static const U32 g_maxDistance = 4 * BLOCKSIZE; -static const U32 g_maxLimit = 1 GB; - -#define WORKPLACESIZE (BLOCKSIZE*3) #define MINMATCH 4 #define LitFSELog 11 #define MLFSELog 10 diff --git a/programs/bench.h b/programs/bench.h index 08f8a36393c..3cc67b4df68 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -26,7 +26,7 @@ /* Main function */ -int BMK_benchFiles(char** fileNamesTable, unsigned nbFiles, unsigned cLevel); +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, unsigned cLevel); /* Set Parameters */ void BMK_SetNbIterations(int nbLoops); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 65b235500fb..5b74df62c02 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -163,7 +163,7 @@ static void waitEnter(void) } -int main(int argc, char** argv) +int main(int argCount, const char** argv) { int i, bench=0, @@ -196,9 +196,9 @@ int main(int argc, char** argv) decode=1; /* command switches */ - for(i=1; i Date: Wed, 11 Nov 2015 14:56:17 +0100 Subject: [PATCH 03/79] fixed Visual projects --- visual/2012/fullbench/fullbench.vcxproj | 7 ++++--- .../2012/fullbench/fullbench.vcxproj.filters | 9 ++++++--- visual/2012/fuzzer/fuzzer.vcxproj | 10 ++++------ visual/2012/fuzzer/fuzzer.vcxproj.filters | 16 +++++----------- visual/2012/zstd/zstd.vcxproj | 19 ++++++++----------- visual/2012/zstd/zstd.vcxproj.filters | 18 ++++++------------ 6 files changed, 33 insertions(+), 46 deletions(-) diff --git a/visual/2012/fullbench/fullbench.vcxproj b/visual/2012/fullbench/fullbench.vcxproj index 5646f091ee6..6239e22305d 100644 --- a/visual/2012/fullbench/fullbench.vcxproj +++ b/visual/2012/fullbench/fullbench.vcxproj @@ -73,7 +73,7 @@ true - $(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); + $(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); true @@ -95,7 +95,7 @@ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true true - /analyze:stacksize19000 %(AdditionalOptions) + /analyze:stacksize25000 %(AdditionalOptions) Console @@ -163,7 +163,8 @@ - + + diff --git a/visual/2012/fullbench/fullbench.vcxproj.filters b/visual/2012/fullbench/fullbench.vcxproj.filters index 41fadb29086..1a80b2db089 100644 --- a/visual/2012/fullbench/fullbench.vcxproj.filters +++ b/visual/2012/fullbench/fullbench.vcxproj.filters @@ -18,9 +18,6 @@ Fichiers sources - - Fichiers sources - Fichiers sources @@ -36,6 +33,12 @@ Fichiers sources + + Fichiers sources + + + Fichiers sources + diff --git a/visual/2012/fuzzer/fuzzer.vcxproj b/visual/2012/fuzzer/fuzzer.vcxproj index 64bf8f9c431..8c302e5c620 100644 --- a/visual/2012/fuzzer/fuzzer.vcxproj +++ b/visual/2012/fuzzer/fuzzer.vcxproj @@ -73,7 +73,7 @@ true - $(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); + $(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); true @@ -95,7 +95,7 @@ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true true - /analyze:stacksize19000 %(AdditionalOptions) + /analyze:stacksize25000 %(AdditionalOptions) Console @@ -163,8 +163,8 @@ - - + + @@ -178,8 +178,6 @@ - - diff --git a/visual/2012/fuzzer/fuzzer.vcxproj.filters b/visual/2012/fuzzer/fuzzer.vcxproj.filters index 4430249963e..f58f85415e2 100644 --- a/visual/2012/fuzzer/fuzzer.vcxproj.filters +++ b/visual/2012/fuzzer/fuzzer.vcxproj.filters @@ -18,9 +18,6 @@ Fichiers sources - - Fichiers sources - Fichiers sources @@ -36,10 +33,13 @@ Fichiers sources - + Fichiers sources - + + Fichiers sources + + Fichiers sources @@ -74,12 +74,6 @@ Fichiers d%27en-tête - - Fichiers d%27en-tête - - - Fichiers d%27en-tête - Fichiers d%27en-tête diff --git a/visual/2012/zstd/zstd.vcxproj b/visual/2012/zstd/zstd.vcxproj index 030138917ac..989ec2e405f 100644 --- a/visual/2012/zstd/zstd.vcxproj +++ b/visual/2012/zstd/zstd.vcxproj @@ -23,8 +23,8 @@ - - + + @@ -40,8 +40,6 @@ - - @@ -104,7 +102,7 @@ true - $(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); + $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); true @@ -115,7 +113,7 @@ false $(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath); - true + false @@ -126,7 +124,7 @@ WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true true - /analyze:stacksize19000 %(AdditionalOptions) + /analyze:stacksize25000 %(AdditionalOptions) Console @@ -140,7 +138,7 @@ Level4 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true + false true /analyze:stacksize19000 %(AdditionalOptions) @@ -178,9 +176,8 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - true - /analyze:stacksize19000 %(AdditionalOptions) + false + false MultiThreaded diff --git a/visual/2012/zstd/zstd.vcxproj.filters b/visual/2012/zstd/zstd.vcxproj.filters index 562e0d4778e..a4679505c35 100644 --- a/visual/2012/zstd/zstd.vcxproj.filters +++ b/visual/2012/zstd/zstd.vcxproj.filters @@ -18,9 +18,6 @@ Fichiers sources - - Fichiers sources - Fichiers sources @@ -39,15 +36,18 @@ Fichiers sources - - Fichiers sources - Fichiers sources Fichiers sources + + Fichiers sources + + + Fichiers sources + @@ -83,12 +83,6 @@ Fichiers d%27en-tête - - Fichiers d%27en-tête - - - Fichiers d%27en-tête - Fichiers d%27en-tête From 14983e7aedf5d82f4eb91ad8e6472fd316abe4c1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Nov 2015 21:38:21 +0100 Subject: [PATCH 04/79] refactored compression code --- lib/zstd_compress.c | 501 +++++++++++++++++++++++++++++++++++++++++- lib/zstd_decompress.c | 365 +----------------------------- lib/zstd_internal.h | 249 ++++----------------- 3 files changed, 545 insertions(+), 570 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index f3cf83c6160..6884521ab79 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -55,25 +55,51 @@ ***************************************/ #include /* malloc */ #include /* memset */ +#include "mem.h" +#include "fse_static.h" +#include "huff0.h" #include "zstd_static.h" #include "zstd_internal.h" -#include "mem.h" /* ************************************* -* Local Constants +* Constants +***************************************/ +static const U32 g_searchStrength = 8; + + +/* ************************************* +* Sequence storage ***************************************/ -#define MINMATCH 4 -#define MAXD_LOG 26 +typedef struct { + void* buffer; + U32* offsetStart; + U32* offset; + BYTE* offCodeStart; + BYTE* offCode; + BYTE* litStart; + BYTE* lit; + BYTE* litLengthStart; + BYTE* litLength; + BYTE* matchLengthStart; + BYTE* matchLength; + BYTE* dumpsStart; + BYTE* dumps; +} seqStore_t; + +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->offset = ssPtr->offsetStart; + ssPtr->lit = ssPtr->litStart; + ssPtr->litLength = ssPtr->litLengthStart; + ssPtr->matchLength = ssPtr->matchLengthStart; + ssPtr->dumps = ssPtr->dumpsStart; +} -#define KB *1024 -#define MB *1024*1024 -#define GB *(1ULL << 30) /* ************************************* -* Local Types +* Context memory management ***************************************/ -#define BLOCKSIZE (128 KB) /* define, for static allocation */ #define WORKPLACESIZE (BLOCKSIZE*3) struct ZSTD_CCtx_s @@ -107,6 +133,8 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) } +static unsigned ZSTD_highbit(U32 val); + /** ZSTD_validateParams correct params value to remain within authorized range optimize for srcSize if srcSize > 0 */ @@ -179,8 +207,459 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } +/* ******************************************************* +* Block entropic compression +*********************************************************/ +size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */ +{ + return FSE_compressBound(srcSize) + 12; +} + + +size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall); + memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); + + /* Build header */ + ostart[0] = (BYTE)(srcSize>>16); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE) srcSize; + ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ + + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall); + + MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW); + memcpy(ostart + 3, src, srcSize); + return srcSize + 3; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + (void)maxDstSize; + MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE); /* note : maxDstSize > litHeaderSize > 4 */ + ostart[3] = *(const BYTE*)src; + return 4; +} + +size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; } + +static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + const size_t minGain = ZSTD_minGain(srcSize); + BYTE* const ostart = (BYTE*)dst; + size_t hsize; + static const size_t litHeaderSize = 5; + + if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + + hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize); + + if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize); + if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize); + + /* Build header */ + { + ostart[0] = (BYTE)(srcSize << 2); /* is a block, is compressed */ + ostart[1] = (BYTE)(srcSize >> 6); + ostart[2] = (BYTE)(srcSize >>14); + ostart[2] += (BYTE)(hsize << 5); + ostart[3] = (BYTE)(hsize >> 3); + ostart[4] = (BYTE)(hsize >>11); + } + + return hsize+litHeaderSize; +} + + +#define LITERAL_NOENTROPY 63 /* cheap heuristic */ + +size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, + const seqStore_t* seqStorePtr, + size_t srcSize) +{ + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + size_t mostFrequent; + U32 max = 255; + U32 tableLog = 11; + U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )]; + U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)]; + U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )]; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const BYTE* const op_lit_start = seqStorePtr->litStart; + const BYTE* const llTable = seqStorePtr->litLengthStart; + const BYTE* const llPtr = seqStorePtr->litLength; + const BYTE* const mlTable = seqStorePtr->matchLengthStart; + const U32* const offsetTable = seqStorePtr->offsetStart; + BYTE* const offCodeTable = seqStorePtr->offCodeStart; + BYTE* op = dst; + BYTE* const oend = dst + maxDstSize; + const size_t nbSeq = llPtr - llTable; + const size_t minGain = ZSTD_minGain(srcSize); + const size_t maxCSize = srcSize - minGain; + BYTE* seqHead; + + + /* Compress literals */ + { + size_t cSize; + size_t litSize = seqStorePtr->lit - op_lit_start; + + if (litSize <= LITERAL_NOENTROPY) + cSize = ZSTD_noCompressLiterals(op, maxDstSize, op_lit_start, litSize); + else + cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize); + if (ZSTD_isError(cSize)) return cSize; + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < MIN_SEQUENCES_SIZE) + return ERROR(dstSize_tooSmall); + MEM_writeLE16(op, (U16)nbSeq); op+=2; + seqHead = op; + + /* dumps : contains too large lengths */ + { + size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; + if (dumpsLength < 512) + { + op[0] = (BYTE)(dumpsLength >> 8); + op[1] = (BYTE)(dumpsLength); + op += 2; + } + else + { + op[0] = 2; + op[1] = (BYTE)(dumpsLength>>8); + op[2] = (BYTE)(dumpsLength); + op += 3; + } + if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall); + memcpy(op, seqStorePtr->dumpsStart, dumpsLength); + op += dumpsLength; + } + + /* CTable for Literal Lengths */ + max = MaxLL; + mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) + { + *op++ = *(seqStorePtr->litLengthStart); + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = bt_rle; + } + else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1)))) + { + FSE_buildCTable_raw(CTable_LitLength, LLbits); + LLtype = bt_raw; + } + else + { + size_t NCountSize; + tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = bt_compressed; + } + + /* CTable for Offsets codes */ + { + /* create Offset codes */ + size_t i; + max = MaxOff; + for (i=0; i 2)) + { + *op++ = *offCodeTable; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = bt_rle; + } + else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1)))) + { + FSE_buildCTable_raw(CTable_OffsetBits, Offbits); + Offtype = bt_raw; + } + else + { + size_t NCountSize; + tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = bt_compressed; + } + + /* CTable for MatchLengths */ + max = MaxML; + mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) + { + *op++ = *seqStorePtr->matchLengthStart; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = bt_rle; + } + else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1)))) + { + FSE_buildCTable_raw(CTable_MatchLength, MLbits); + MLtype = bt_raw; + } + else + { + size_t NCountSize; + tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = bt_compressed; + } + + seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + /* Encoding Sequences */ + { + size_t streamSize, errorCode; + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + int i; + + errorCode = BIT_initCStream(&blockStream, op, oend-op); + if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); /* not enough space remaining */ + FSE_initCState(&stateMatchLength, CTable_MatchLength); + FSE_initCState(&stateOffsetBits, CTable_OffsetBits); + FSE_initCState(&stateLitLength, CTable_LitLength); + + for (i=(int)nbSeq-1; i>=0; i--) + { + BYTE matchLength = mlTable[i]; + U32 offset = offsetTable[i]; + BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/ + U32 nbBits = (offCode-1) * (!!offCode); + BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + BIT_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */ + FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */ + BIT_flushBits(&blockStream); /* 7 */ /* 7 */ + } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } + + /* check compressibility */ + if ((size_t)(op-dst) >= maxCSize) return 0; + + return op - dst; +} + + +/** ZSTD_storeSeq + Store a sequence (literal length, literals, offset code and match length) into seqStore_t + @offsetCode : distance to match, or 0 == repCode + @matchCode : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) +{ +#if 0 + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = literals; + if (literals - g_start == 8695) + printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + (U32)(literals - g_start), (U32)litLength, (U32)matchCode+4, (U32)offsetCode); +#endif + + /* copy Literals */ + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength >= MaxLL) + { + *(seqStorePtr->litLength++) = MaxLL; + if (litLength<255 + MaxLL) + *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL); + else + { + *(seqStorePtr->dumps++) = 255; + MEM_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3; + } + } + else *(seqStorePtr->litLength++) = (BYTE)litLength; + + /* match offset */ + *(seqStorePtr->offset++) = (U32)offsetCode; + + /* match Length */ + if (matchCode >= MaxML) + { + *(seqStorePtr->matchLength++) = MaxML; + if (matchCode < 255+MaxML) + *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML); + else + { + *(seqStorePtr->dumps++) = 255; + MEM_writeLE32(seqStorePtr->dumps, (U32)matchCode); seqStorePtr->dumps += 3; + } + } + else *(seqStorePtr->matchLength++) = (BYTE)matchCode; +} + + +/* ************************************* +* Match length counter +***************************************/ +static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } + +static unsigned ZSTD_highbit(U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + +MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) + { + if (MEM_64bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } + else /* Big Endian CPU */ + { + if (MEM_32bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } + } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + while ((pIn> (64-h)) ; } static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); } -static const U64 prime7bytes = 58295818150454627ULL; +static const U64 prime7bytes = 58295818150454627ULL; static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 36d5bfc38d3..38e1e130720 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -33,14 +33,6 @@ /* *************************************************************** * Tuning parameters *****************************************************************/ -/*! -* MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -*/ -#define ZSTD_MEMORY_USAGE 16 - /*! * HEAPMODE : * Select how default compression functions will allocate memory for their hash table, @@ -53,7 +45,7 @@ /*! * LEGACY_SUPPORT : -* decompressor can decode older formats (starting from Zstd 0.1+) +* ZSTD_decompress() can decode older formats (starting from zstd 0.1+) */ #ifndef ZSTD_LEGACY_SUPPORT # define ZSTD_LEGACY_SUPPORT 1 @@ -80,10 +72,6 @@ /* ******************************************************* * Compiler specifics *********************************************************/ -#ifdef __AVX2__ -# include /* AVX2 intrinsics */ -#endif - #ifdef _MSC_VER /* Visual Studio */ # define FORCE_INLINE static __forceinline # include /* For Visual 2005 */ @@ -99,43 +87,14 @@ #endif -/* ******************************************************* -* Constants -*********************************************************/ -#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) -#define HASH_TABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASH_TABLESIZE - 1) - -#define KNUTH 2654435761 - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define BLOCKSIZE (128 KB) /* define, for static allocation */ -#define IS_RAW BIT0 -#define IS_RLE BIT1 - -#define MINMATCH 4 -#define LitFSELog 11 -#define MLFSELog 10 -#define LLFSELog 10 -#define OffFSELog 9 -#define MAX(a,b) ((a)<(b)?(b):(a)) -#define MaxSeq MAX(MaxLL, MaxML) - -#define LITERAL_NOENTROPY 63 -#define COMMAND_NOENTROPY 7 /* to remove */ - -static const size_t ZSTD_blockHeaderSize = 3; -static const size_t ZSTD_frameHeaderSize = 4; +/* ************************************* +* Local types +***************************************/ +typedef struct +{ + blockType_t blockType; + U32 origSize; +} blockProperties_t; /* ******************************************************* @@ -144,22 +103,11 @@ static const size_t ZSTD_frameHeaderSize = 4; static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } -/* ************************************** -* Local structures -****************************************/ -void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->offset = ssPtr->offsetStart; - ssPtr->lit = ssPtr->litStart; - ssPtr->litLength = ssPtr->litLengthStart; - ssPtr->matchLength = ssPtr->matchLengthStart; - ssPtr->dumps = ssPtr->dumpsStart; -} - - /* ************************************* * Error Management ***************************************/ +unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } + /*! ZSTD_isError * tells if a return value is an error code */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } @@ -169,295 +117,6 @@ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } -/* ************************************* -* Tool functions -***************************************/ -unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } - - -/* ******************************************************* -* Compression -*********************************************************/ -size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */ -{ - return FSE_compressBound(srcSize) + 12; -} - - -size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - - if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall); - memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); - - /* Build header */ - ostart[0] = (BYTE)(srcSize>>16); - ostart[1] = (BYTE)(srcSize>>8); - ostart[2] = (BYTE) srcSize; - ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ - - return ZSTD_blockHeaderSize+srcSize; -} - - -static size_t ZSTD_compressRawLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - - if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall); - - MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW); - memcpy(ostart + 3, src, srcSize); - return srcSize + 3; -} - -static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - - (void)maxDstSize; - MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE); /* note : maxDstSize > litHeaderSize > 4 */ - ostart[3] = *(const BYTE*)src; - return 4; -} - -size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; } - -static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, - const void* src, size_t srcSize) -{ - const size_t minGain = ZSTD_minGain(srcSize); - BYTE* const ostart = (BYTE*)dst; - size_t hsize; - static const size_t litHeaderSize = 5; - - if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - - hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize); - - if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_compressRawLiteralsBlock(dst, maxDstSize, src, srcSize); - if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize); - - /* Build header */ - { - ostart[0] = (BYTE)(srcSize << 2); /* is a block, is compressed */ - ostart[1] = (BYTE)(srcSize >> 6); - ostart[2] = (BYTE)(srcSize >>14); - ostart[2] += (BYTE)(hsize << 5); - ostart[3] = (BYTE)(hsize >> 3); - ostart[4] = (BYTE)(hsize >>11); - } - - return hsize+litHeaderSize; -} - - -size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, - const seqStore_t* seqStorePtr, - size_t srcSize) -{ - U32 count[MaxSeq+1]; - S16 norm[MaxSeq+1]; - size_t mostFrequent; - U32 max = 255; - U32 tableLog = 11; - U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )]; - U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)]; - U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )]; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const BYTE* const op_lit_start = seqStorePtr->litStart; - const BYTE* const llTable = seqStorePtr->litLengthStart; - const BYTE* const llPtr = seqStorePtr->litLength; - const BYTE* const mlTable = seqStorePtr->matchLengthStart; - const U32* const offsetTable = seqStorePtr->offsetStart; - BYTE* const offCodeTable = seqStorePtr->offCodeStart; - BYTE* op = dst; - BYTE* const oend = dst + maxDstSize; - const size_t nbSeq = llPtr - llTable; - const size_t minGain = ZSTD_minGain(srcSize); - const size_t maxCSize = srcSize - minGain; - BYTE* seqHead; - - - /* Compress literals */ - { - size_t cSize; - size_t litSize = seqStorePtr->lit - op_lit_start; - - if (litSize <= LITERAL_NOENTROPY) - cSize = ZSTD_compressRawLiteralsBlock(op, maxDstSize, op_lit_start, litSize); - else - cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize); - if (ZSTD_isError(cSize)) return cSize; - op += cSize; - } - - /* Sequences Header */ - if ((oend-op) < MIN_SEQUENCES_SIZE) - return ERROR(dstSize_tooSmall); - MEM_writeLE16(op, (U16)nbSeq); op+=2; - seqHead = op; - - /* dumps : contains too large lengths */ - { - size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; - if (dumpsLength < 512) - { - op[0] = (BYTE)(dumpsLength >> 8); - op[1] = (BYTE)(dumpsLength); - op += 2; - } - else - { - op[0] = 2; - op[1] = (BYTE)(dumpsLength>>8); - op[2] = (BYTE)(dumpsLength); - op += 3; - } - if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall); - memcpy(op, seqStorePtr->dumpsStart, dumpsLength); - op += dumpsLength; - } - - /* CTable for Literal Lengths */ - max = MaxLL; - mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) - { - *op++ = *(seqStorePtr->litLengthStart); - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = bt_rle; - } - else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1)))) - { - FSE_buildCTable_raw(CTable_LitLength, LLbits); - LLtype = bt_raw; - } - else - { - size_t NCountSize; - tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); - LLtype = bt_compressed; - } - - /* CTable for Offsets codes */ - { - /* create Offset codes */ - size_t i; - max = MaxOff; - for (i=0; i 2)) - { - *op++ = *offCodeTable; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = bt_rle; - } - else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1)))) - { - FSE_buildCTable_raw(CTable_OffsetBits, Offbits); - Offtype = bt_raw; - } - else - { - size_t NCountSize; - tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); - Offtype = bt_compressed; - } - - /* CTable for MatchLengths */ - max = MaxML; - mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) - { - *op++ = *seqStorePtr->matchLengthStart; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = bt_rle; - } - else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1)))) - { - FSE_buildCTable_raw(CTable_MatchLength, MLbits); - MLtype = bt_raw; - } - else - { - size_t NCountSize; - tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); - MLtype = bt_compressed; - } - - seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - - /* Encoding Sequences */ - { - size_t streamSize, errorCode; - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - int i; - - errorCode = BIT_initCStream(&blockStream, op, oend-op); - if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); /* not enough space remaining */ - FSE_initCState(&stateMatchLength, CTable_MatchLength); - FSE_initCState(&stateOffsetBits, CTable_OffsetBits); - FSE_initCState(&stateLitLength, CTable_LitLength); - - for (i=(int)nbSeq-1; i>=0; i--) - { - BYTE matchLength = mlTable[i]; - U32 offset = offsetTable[i]; - BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/ - U32 nbBits = (offCode-1) * (!!offCode); - BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - BIT_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */ - FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */ - BIT_flushBits(&blockStream); /* 7 */ /* 7 */ - } - - FSE_flushCState(&blockStream, &stateMatchLength); - FSE_flushCState(&blockStream, &stateOffsetBits); - FSE_flushCState(&blockStream, &stateLitLength); - - streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; - } - - /* check compressibility */ - if ((size_t)(op-dst) >= maxCSize) return 0; - - return op - dst; -} - - - - /* ************************************************************* * Decompression section ***************************************************************/ @@ -526,7 +185,7 @@ static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, /** ZSTD_decodeLiteralsBlock - @return : nb of bytes read from src (< srcSize )*/ + @return : nb of bytes read from src (< srcSize ) */ size_t ZSTD_decodeLiteralsBlock(void* ctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index d606e23dc04..7236cf2aba1 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -44,237 +44,74 @@ extern "C" { #include "error.h" -/* ************************************** -* Function body to include for inlining -****************************************/ -static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } - +/* ************************************* +* Common macros +***************************************/ #define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) -static unsigned ZSTD_highbit(U32 val) -{ -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - _BitScanReverse(&r, val); - return (unsigned)r; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); -# else /* Software version */ - static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - int r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - -MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) -{ - if (MEM_isLittleEndian()) - { - if (MEM_64bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) - unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } - else /* Big Endian CPU */ - { - if (MEM_32bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } - } -} - - -MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while ((pInlit, literals, litLength); - seqStorePtr->lit += litLength; - /* literal Length */ - if (litLength >= MaxLL) - { - *(seqStorePtr->litLength++) = MaxLL; - if (litLength<255 + MaxLL) - *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL); - else - { - *(seqStorePtr->dumps++) = 255; - MEM_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3; - } - } - else *(seqStorePtr->litLength++) = (BYTE)litLength; +/* ****************************************** +* Shared functions to include for inlining +********************************************/ +static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } - /* match offset */ - *(seqStorePtr->offset++) = (U32)offsetCode; +#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } - /* match Length */ - if (matchCode >= MaxML) - { - *(seqStorePtr->matchLength++) = MaxML; - if (matchCode < 255+MaxML) - *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML); - else - { - *(seqStorePtr->dumps++) = 255; - MEM_writeLE32(seqStorePtr->dumps, (U32)matchCode); seqStorePtr->dumps += 3; - } - } - else *(seqStorePtr->matchLength++) = (BYTE)matchCode; +/*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */ +static void ZSTD_wildcopy(void* dst, const void* src, size_t length) +{ + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + length; + do + COPY8(op, ip) + while (op < oend); } -/* prototype, body into zstd.c */ -size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, const seqStore_t* seqStorePtr, size_t srcSize); - - #if defined (__cplusplus) } #endif From 5b78d2f20c7e8847b580e8cf2fe875a0271984ed Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Nov 2015 15:36:05 +0100 Subject: [PATCH 05/79] basic rolling buffer support in decoder --- NEWS | 1 + lib/zstd_decompress.c | 98 ++++++++++++++++++++++++++----------------- 2 files changed, 60 insertions(+), 39 deletions(-) diff --git a/NEWS b/NEWS index 94bd7db1926..c8177e8da89 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ v0.4.0 Removed zstdhc => merged into zstd +Rolling buffer support v0.3.6 small blocks params diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 38e1e130720..f1019588dbd 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -118,7 +118,7 @@ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /* ************************************************************* -* Decompression section +* Context management ***************************************************************/ struct ZSTD_DCtx_s { @@ -127,6 +127,8 @@ struct ZSTD_DCtx_s U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; void* previousDstEnd; void* base; + void* vBase; + void* dictEnd; size_t expected; blockType_t bType; U32 phase; @@ -136,6 +138,35 @@ struct ZSTD_DCtx_s BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */]; }; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */ +size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize; + dctx->phase = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + dctx->vBase = NULL; + dctx->dictEnd = NULL; + return 0; +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); + if (dctx==NULL) return NULL; + ZSTD_resetDCtx(dctx); + return dctx; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + free(dctx); + return 0; +} + + +/* ************************************************************* +* Decompression section +***************************************************************/ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { @@ -186,10 +217,9 @@ static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr, /** ZSTD_decodeLiteralsBlock @return : nb of bytes read from src (< srcSize ) */ -size_t ZSTD_decodeLiteralsBlock(void* ctx, +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { - ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; const BYTE* const istart = (const BYTE*) src; /* any compressed block with literals segment must be at least this size */ @@ -427,10 +457,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) } -static size_t ZSTD_execSequence(BYTE* op, +FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit_8, - BYTE* const base, BYTE* const oend) + BYTE* const base, BYTE* const vBase, BYTE* const dictEnd, + BYTE* const oend) { static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ @@ -457,7 +488,9 @@ static size_t ZSTD_execSequence(BYTE* op, /* check */ //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer removing this test ?) */ if (sequence.offset > (size_t)op) return ERROR(corruption_detected); /* address space overflow test (this test seems kept by clang optimizer) */ - if (match < base) return ERROR(corruption_detected); + if (match < vBase) return ERROR(corruption_detected); + + if (match < base) match = dictEnd - (base-match); /* only works if match + matchLength <= dictEnd */ /* close range match, overlap */ if (sequence.offset < 8) @@ -497,11 +530,10 @@ static size_t ZSTD_execSequence(BYTE* op, } static size_t ZSTD_decompressSequences( - void* ctx, + ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize) { - ZSTD_DCtx* dctx = (ZSTD_DCtx*)ctx; const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; BYTE* const ostart = (BYTE* const)dst; @@ -517,6 +549,8 @@ static size_t ZSTD_decompressSequences( U32* DTableML = dctx->MLTable; U32* DTableOffb = dctx->OffTable; BYTE* const base = (BYTE*) (dctx->base); + BYTE* const vBase = (BYTE*) (dctx->vBase); + BYTE* const dictEnd = (BYTE*) (dctx->dictEnd); /* Build Decoding Tables */ errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, @@ -546,7 +580,7 @@ static size_t ZSTD_decompressSequences( size_t oneSeqSize; nbSeq--; ZSTD_decodeSequence(&sequence, &seqState); - oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litLimit_8, base, oend); + oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litLimit_8, base, vBase, dictEnd, oend); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } @@ -570,7 +604,7 @@ static size_t ZSTD_decompressSequences( static size_t ZSTD_decompressBlock( - void* ctx, + ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { @@ -578,16 +612,16 @@ static size_t ZSTD_decompressBlock( const BYTE* ip = (const BYTE*)src; /* Decode literals sub-block */ - size_t litCSize = ZSTD_decodeLiteralsBlock(ctx, src, srcSize); + size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); if (ZSTD_isError(litCSize)) return litCSize; ip += litCSize; srcSize -= litCSize; - return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize); + return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize); } -size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; const BYTE* iend = ip + srcSize; @@ -598,6 +632,10 @@ size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* U32 magicNumber; blockProperties_t blockProperties; + + /* init */ + ctx->base = ctx->vBase = ctx->dictEnd = dst; + /* Frame Header */ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); magicNumber = MEM_readLE32(src); @@ -651,7 +689,6 @@ size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { ZSTD_DCtx ctx; - ctx.base = dst; return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } @@ -660,29 +697,6 @@ size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t src * Streaming Decompression API ********************************/ -size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) -{ - dctx->expected = ZSTD_frameHeaderSize; - dctx->phase = 0; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - return 0; -} - -ZSTD_DCtx* ZSTD_createDCtx(void) -{ - ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx)); - if (dctx==NULL) return NULL; - ZSTD_resetDCtx(dctx); - return dctx; -} - -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) -{ - free(dctx); - return 0; -} - size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; @@ -693,7 +707,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con /* Sanity check */ if (srcSize != ctx->expected) return ERROR(srcSize_wrong); if (dst != ctx->previousDstEnd) /* not contiguous */ - ctx->base = dst; + { + ctx->dictEnd = ctx->previousDstEnd; + if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment right into tracked memory */ + ctx->base = (char*)dst + maxDstSize; /* temporary affectation, for vBase calculation */ + ctx->vBase = (char*)dst - ((char*)(ctx->dictEnd) - (char*)(ctx->base)); + ctx->base = dst; + } /* Decompress : frame header */ if (ctx->phase == 0) @@ -749,7 +769,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con } ctx->phase = 1; ctx->expected = ZSTD_blockHeaderSize; - ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); + ctx->previousDstEnd = (char*)dst + rSize; return rSize; } From cfdeb3418093100ee1616a5777d280ab6a04e4eb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Nov 2015 16:00:04 +0100 Subject: [PATCH 06/79] fixed g++ link error --- programs/fullbench.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/programs/fullbench.c b/programs/fullbench.c index 239b4180eb9..edf6f4b6fb0 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -61,7 +61,7 @@ #endif #include "mem.h" -#include "zstd.h" +#include "zstd_static.h" #include "fse_static.h" #include "datagen.h" @@ -209,7 +209,7 @@ typedef struct } blockProperties_t; static size_t g_cSize = 0; -static U32 g_litCtx[40 * 1024]; +static ZSTD_DCtx* g_dctxPtr = NULL; extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr); extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize); @@ -226,11 +226,11 @@ size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* return ZSTD_decompress(dst, dstSize, buff2, g_cSize); } -extern size_t ZSTD_decodeLiteralsBlock(void* ctx, const void* src, size_t srcSize); +extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) { (void)src; (void)srcSize; (void)dst; (void)dstSize; - return ZSTD_decodeLiteralsBlock(g_litCtx, buff2, g_cSize); + return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_dctxPtr, buff2, g_cSize); } size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) @@ -314,6 +314,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) dstBuffSize = ZSTD_compressBound(srcSize); dstBuff = (BYTE*)malloc(dstBuffSize); buff2 = (BYTE*)malloc(dstBuffSize); + g_dctxPtr = ZSTD_createDCtx(); if ((!dstBuff) || (!buff2)) { DISPLAY("\nError: not enough memory!\n"); @@ -361,7 +362,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) } iend = ip + 3 + blockSize; // Get end of first block ip += 3; // jump first block header - ip += ZSTD_decodeLiteralsBlock(g_litCtx, ip, iend-ip); // jump literal sub block and its header + ip += ZSTD_decodeLiteralsBlock(g_dctxPtr, ip, iend-ip); // jump literal sub block and its header g_cSize = iend-ip; memcpy(buff2, ip, g_cSize); // copy rest of block (starting with SeqHeader) srcSize = srcSize > 128 KB ? 128 KB : srcSize; // speed relative to block @@ -420,6 +421,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) free(dstBuff); free(buff2); + ZSTD_freeDCtx(g_dctxPtr); return 0; } From 0f366c61eda83db0346666a41befb1cc75c37370 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Nov 2015 16:19:30 +0100 Subject: [PATCH 07/79] fixed streaming decoder --- lib/zstd_decompress.c | 11 ++++++----- programs/fullbench.c | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index f1019588dbd..9eab86b5ee3 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -187,7 +187,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp return cSize; } -static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); memcpy(dst, src, srcSize); @@ -486,10 +486,10 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, const BYTE* match = op - sequence.offset; /* check */ - //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer removing this test ?) */ + //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */ if (sequence.offset > (size_t)op) return ERROR(corruption_detected); /* address space overflow test (this test seems kept by clang optimizer) */ if (match < vBase) return ERROR(corruption_detected); - + if (match < base) match = dictEnd - (base-match); /* only works if match + matchLength <= dictEnd */ /* close range match, overlap */ @@ -663,7 +663,7 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize); break; case bt_raw : - decodedSize = ZSTD_copyUncompressedBlock(op, oend-op, ip, cBlockSize); + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); break; case bt_rle : return ERROR(GENERIC); /* not yet supported */ @@ -744,6 +744,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con ctx->phase = 2; } + ctx->previousDstEnd = dst; return 0; } @@ -756,7 +757,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); break; case bt_raw : - rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); + rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize); break; case bt_rle : return ERROR(GENERIC); /* not yet handled */ diff --git a/programs/fullbench.c b/programs/fullbench.c index edf6f4b6fb0..5471134dd04 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -360,8 +360,8 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) free(buff2); return 0; } - iend = ip + 3 + blockSize; // Get end of first block - ip += 3; // jump first block header + iend = ip + 3 + blockSize; /* End of first block */ + ip += 3; /* skip block header */ ip += ZSTD_decodeLiteralsBlock(g_dctxPtr, ip, iend-ip); // jump literal sub block and its header g_cSize = iend-ip; memcpy(buff2, ip, g_cSize); // copy rest of block (starting with SeqHeader) From 89db5e001d436320e877f3052a26267583b41720 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 13 Nov 2015 11:27:46 +0100 Subject: [PATCH 08/79] rolling buffer preliminary support for fast mode --- lib/mem.h | 6 +- lib/zstd_compress.c | 254 +++++++++++++++++++++++++++++++++++++------- lib/zstd_static.h | 2 +- 3 files changed, 217 insertions(+), 45 deletions(-) diff --git a/lib/mem.h b/lib/mem.h index 01f6ad0fc3b..8ac56ed9a47 100644 --- a/lib/mem.h +++ b/lib/mem.h @@ -49,12 +49,12 @@ extern "C" { /****************************************** * Compiler-specific ******************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#if defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define MEM_STATIC static inline #elif defined(_MSC_VER) # define MEM_STATIC static __inline -#elif defined(__GNUC__) -# define MEM_STATIC static __attribute__((unused)) #else # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ #endif diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 6884521ab79..fa720386004 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -104,7 +104,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) struct ZSTD_CCtx_s { - const BYTE* end; /* next block here to continue on current prefix */ + const BYTE* nextSrc; /* next block here to continue on current prefix */ const BYTE* base; /* All regular indexes relative to this position */ const BYTE* dictBase; /* extDict indexes relative to this position */ U32 dictLimit; /* below that point, need extDict */ @@ -166,8 +166,8 @@ void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint) static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, - ZSTD_parameters params, - U64 srcSizeHint) + ZSTD_parameters params, + U64 srcSizeHint) { ZSTD_validateParams(¶ms, srcSizeHint); @@ -190,7 +190,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } zc->nextToUpdate = 1; - zc->end = NULL; + zc->nextSrc = NULL; zc->base = NULL; zc->dictBase = NULL; zc->dictLimit = 0; @@ -207,6 +207,22 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, + const U32 reducerValue) +{ + const U32 contentLog = zc->params.strategy == ZSTD_fast ? 1 : zc->params.contentLog; + const U32 tableSpaceU32 = (1 << contentLog) + (1 << zc->params.hashLog); + U32* table32 = zc->hashTable; + U32 index; + + for (index=0 ; index < tableSpaceU32 ; index++) + { + if (table32[index] < reducerValue) table32[index] = 0; + else table32[index] -= reducerValue; + } +} + + /* ******************************************************* * Block entropic compression *********************************************************/ @@ -696,24 +712,22 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) FORCE_INLINE size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - const U32 mls) + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const U32 mls) { U32* hashTable = ctx->hashTable; const U32 hBits = ctx->params.hashLog; seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const base = ctx->base; - const size_t maxDist = ((size_t)1 << ctx->params.windowLog); - const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const BYTE* const lowest = (size_t)(istart-base) > maxDist ? istart-maxDist : base; + const BYTE* const lowest = base + ctx->lowLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - size_t offset_2=4, offset_1=4; + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; /* init */ @@ -790,6 +804,116 @@ size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, } +FORCE_INLINE +size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->params.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowLimit = ctx->lowLimit; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + { + /* skip first 4 positions to avoid read overflow during repcode match check */ + hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); + hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = (U32)(ip-base+1); + hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip-base+2); + hashTable[ZSTD_hashPtr(ip+3, hBits, mls)] = (U32)(ip-base+3); + ip += 4; + } + + /* Main Search Loop */ + while (ip < ilimit) /* < instead of <=, because unconditional ZSTD_addPtr(ip+1) */ + { + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : base + dictLimit; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current - (U32)offset_2; + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + hashTable[h] = current; /* update hash table */ + + if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) + &&(MEM_read32(repMatch) == MEM_read32(ip)) ) + match = repMatch, matchIndex = repIndex, matchBase = repBase, + lowMatchPtr = repIndex < dictLimit ? dictBase + lowLimit : base + dictLimit ; + if ( (matchIndex < lowLimit) || + (MEM_read32(match) != MEM_read32(ip)) ) + { ip += ((ip-anchor) >> g_searchStrength) + 1; offset_2 = offset_1; continue; } + /* found a match */ + while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; } /* catch up */ + + { + size_t litLength = ip-anchor; + size_t maxLength = MIN((size_t)(iend-ip-MINMATCH), (size_t)(dictLimit - matchIndex)); /* works even if matchIndex > dictLimit */ + const BYTE* const iEndCount = ip + maxLength; + size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); + size_t offsetCode = current-matchIndex; + if (offsetCode == offset_2) offsetCode = 0; + offset_2 = offset_1; + offset_1 = current-matchIndex; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength); + + /* Fill Table */ + hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = current+1; + ip += matchLength + MINMATCH; + anchor = ip; + if (ip < ilimit) /* same test as main loop, helps speed */ + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + } + } + + /* Last Literals */ + { + size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } + + /* Finale compression stage */ + return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); +} + + +size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.searchLength; + switch(mls) + { + default: + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 7); + } +} + + /* ************************************* * Binary Tree search ***************************************/ @@ -1024,8 +1148,7 @@ size_t ZSTD_HcFindBestMatch ( const BYTE* const base = zc->base; const BYTE* const dictBase = zc->dictBase; const U32 dictLimit = zc->dictLimit; - const U32 maxDistance = (1 << zc->params.windowLog); - const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const U32 lowLimit = zc->lowLimit; U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; @@ -1329,28 +1452,48 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat) +static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { - switch(strat) + if (extDict) { - default : - case ZSTD_fast: - return ZSTD_compressBlock_fast; - case ZSTD_greedy: - return ZSTD_compressBlock_greedy; - case ZSTD_lazy: - return ZSTD_compressBlock_lazy; - case ZSTD_lazy2: - return ZSTD_compressBlock_lazy2; - case ZSTD_btlazy2: - return ZSTD_compressBlock_btlazy2; + switch(strat) + { + default : + case ZSTD_fast: + return ZSTD_compressBlock_fast_extDict; + case ZSTD_greedy: + return ZSTD_compressBlock_greedy; + case ZSTD_lazy: + return ZSTD_compressBlock_lazy; + case ZSTD_lazy2: + return ZSTD_compressBlock_lazy2; + case ZSTD_btlazy2: + return ZSTD_compressBlock_btlazy2; + } + } + else + { + switch(strat) + { + default : + case ZSTD_fast: + return ZSTD_compressBlock_fast; + case ZSTD_greedy: + return ZSTD_compressBlock_greedy; + case ZSTD_lazy: + return ZSTD_compressBlock_lazy; + case ZSTD_lazy2: + return ZSTD_compressBlock_lazy2; + case ZSTD_btlazy2: + return ZSTD_compressBlock_btlazy2; + } } } size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy); + ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy, ctx->lowLimit < ctx->dictLimit); return blockCompressor(ctx, dst, maxDstSize, src, srcSize); } @@ -1364,16 +1507,22 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, const BYTE* ip = (const BYTE*)src; BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; - const ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctxPtr->params.strategy); + const U32 maxDist = 1 << ctxPtr->params.windowLog; + //const ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctxPtr->params.strategy, ctxPtr->lowLimit < ctxPtr->dictLimit); while (remaining) { size_t cSize; if (maxDstSize < 3 + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ - if (remaining < blockSize) blockSize = remaining; - cSize = blockCompressor(ctxPtr, op+3, maxDstSize-3, ip, blockSize); + + if ((U32)(ip+blockSize - (ctxPtr->base + ctxPtr->lowLimit)) > maxDist) + /* respect windowLog contract */ + ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist; + + //cSize = blockCompressor(ctxPtr, op+3, maxDstSize-3, ip, blockSize); + cSize = ZSTD_compressBlock(ctxPtr, op+3, maxDstSize-3, ip, blockSize); if (ZSTD_isError(cSize)) return cSize; if (cSize == 0) @@ -1405,28 +1554,52 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* ctxPtr, { const BYTE* const ip = (const BYTE*) src; + /* preemptive overflow correction */ + if (ctxPtr->lowLimit > (1<<30) ) + { + U32 correction = ctxPtr->lowLimit; + ZSTD_reduceIndex(ctxPtr, correction); + ctxPtr->base += correction; + ctxPtr->dictBase += correction; + ctxPtr->lowLimit -= correction; + ctxPtr->dictLimit -= correction; + if (ctxPtr->nextToUpdate < correction) ctxPtr->nextToUpdate = 0; + else ctxPtr->nextToUpdate -= correction; + } + /* Check if blocks follow each other */ - if (ip != ctxPtr->end) + if (src != ctxPtr->nextSrc) + { + /* not contiguous */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictLimit = (U32)(ctxPtr->nextSrc - ctxPtr->base); + ctxPtr->dictBase = ctxPtr->base; + ctxPtr->base += ip - ctxPtr->nextSrc; + } + + /* input-dictionary overlap */ + if ((ip+srcSize > ctxPtr->dictBase + ctxPtr->lowLimit) && (ip < ctxPtr->dictBase + ctxPtr->dictLimit)) { - if (ctxPtr->end != NULL) - ZSTD_resetCCtx_advanced(ctxPtr, ctxPtr->params, srcSize); - ctxPtr->base = ip; + ctxPtr->lowLimit = (U32)(ip + srcSize - ctxPtr->dictBase); + if (ctxPtr->lowLimit > ctxPtr->dictLimit) ctxPtr->lowLimit = ctxPtr->dictLimit; } - ctxPtr->end = ip + srcSize; + ctxPtr->nextSrc = ip + srcSize; + return ZSTD_compress_generic (ctxPtr, dst, dstSize, src, srcSize); } size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const ZSTD_parameters params, - U64 srcSizeHint) + void* dst, size_t maxDstSize, + const ZSTD_parameters params, + const U64 srcSizeHint) { size_t errorCode; if (maxDstSize < 4) return ERROR(dstSize_tooSmall); errorCode = ZSTD_resetCCtx_advanced(ctx, params, srcSizeHint); if (ZSTD_isError(errorCode)) return errorCode; + MEM_writeLE32(dst, ZSTD_magicNumber); /* Write Header */ return 4; } @@ -1434,7 +1607,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint) { - int tableID = ((srcSizeHint-1) > 128 KB); /* intentional underflow for 0 */ + int tableID = ((srcSizeHint-1) > 128 KB); /* intentional underflow for srcSizeHint == 0 */ if (compressionLevel<=0) compressionLevel = 1; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_defaultParameters[tableID][compressionLevel], srcSizeHint); @@ -1498,7 +1671,6 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) { const int tableID = (srcSize > 128 KB); - //if (compressionLevel<=1) return ZSTD_compress(dst, maxDstSize, src, srcSize); /* fast mode */ if (compressionLevel < 1) compressionLevel = 1; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_defaultParameters[tableID][compressionLevel]); @@ -1510,6 +1682,6 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi ZSTD_CCtx ctxBody; memset(&ctxBody, 0, sizeof(ctxBody)); result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel); - free(ctxBody.workSpace); + free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; take care of heap content */ return result; } diff --git a/lib/zstd_static.h b/lib/zstd_static.h index f29120ede81..fc4dbe30e56 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -154,7 +154,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[2][ZSTD_MAX_CLEVEL+1] = { { /* for > 128 KB */ /* W, C, H, S, L, strat */ { 18, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ - { 18, 14, 14, 1, 7, ZSTD_fast }, /* level 1 - in fact redirected towards zstd fast */ + { 19, 13, 14, 1, 7, ZSTD_fast }, /* level 1 - in fact redirected towards zstd fast */ { 19, 15, 16, 1, 6, ZSTD_fast }, /* level 2 */ { 20, 18, 20, 1, 6, ZSTD_fast }, /* level 3 */ { 21, 19, 21, 1, 6, ZSTD_fast }, /* level 4 */ From 93a823ccb73d2941fd66a076b55bd74bba563c01 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 13 Nov 2015 15:08:43 +0100 Subject: [PATCH 09/79] fixed asan warnings --- lib/zstd_compress.c | 4 +- lib/zstd_decompress.c | 95 +++++++++++++++++++++++++++---------------- programs/fullbench.c | 9 ++-- 3 files changed, 66 insertions(+), 42 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index fa720386004..a128ed8cca0 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -751,7 +751,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, if ( (match < lowest) || (MEM_read32(match) != MEM_read32(ip)) ) { ip += ((ip-anchor) >> g_searchStrength) + 1; offset_2 = offset_1; continue; } - while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; } /* catch up */ + while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; } /* catch up */ { size_t litLength = ip-anchor; @@ -804,7 +804,7 @@ size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, } -FORCE_INLINE +//FORCE_INLINE size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 9eab86b5ee3..e195c7c17b2 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -488,45 +488,72 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* check */ //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */ if (sequence.offset > (size_t)op) return ERROR(corruption_detected); /* address space overflow test (this test seems kept by clang optimizer) */ - if (match < vBase) return ERROR(corruption_detected); - if (match < base) match = dictEnd - (base-match); /* only works if match + matchLength <= dictEnd */ - - /* close range match, overlap */ - if (sequence.offset < 8) - { - const int dec64 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= dec64; - } - else + if (match < base) { - ZSTD_copy8(op, match); - } - op += 8; match += 8; + /* offset beyond prefix */ + if (match < vBase) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd - 8) + { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + return oMatchEnd - ostart; + } + if (match + sequence.matchLength <= dictEnd) + { + memcpy(op, match, sequence.matchLength); + return oMatchEnd - ostart; + } + /* span extDict & currentPrefixSegment */ + { + size_t length1 = dictEnd - match; + size_t length2 = sequence.matchLength - length1; + memcpy(op, match, length1); + op += length1; + memcpy(op, base, length2); /* will fail in case of overlapping match */ + return oMatchEnd - ostart; + } + } + + { + /* match within prefix */ + if (sequence.offset < 8) + { + /* close range match, overlap */ + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } + else + { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-12) + { + if (op < oend_8) + { + ZSTD_wildcopy(op, match, oend_8 - op); + match += oend_8 - op; + op = oend_8; + } + while (op < oMatchEnd) *op++ = *match++; + } + else + { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return oMatchEnd - ostart; + } - if (oMatchEnd > oend-12) - { - if (op < oend_8) - { - ZSTD_wildcopy(op, match, oend_8 - op); - match += oend_8 - op; - op = oend_8; - } - while (op < oMatchEnd) *op++ = *match++; - } - else - { - ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ - } } - return oMatchEnd - ostart; } static size_t ZSTD_decompressSequences( diff --git a/programs/fullbench.c b/programs/fullbench.c index 5471134dd04..5ee5d493153 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -336,9 +336,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) if (bp.blockType != bt_compressed) { DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n"); - free(dstBuff); - free(buff2); - return 0; + goto _cleanOut; } memcpy(buff2, dstBuff+7, g_cSize-7); srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block @@ -356,9 +354,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) if (bp.blockType != bt_compressed) { DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n"); - free(dstBuff); - free(buff2); - return 0; + goto _cleanOut; } iend = ip + 3 + blockSize; /* End of first block */ ip += 3; /* skip block header */ @@ -419,6 +415,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) DISPLAY("%2u- %-30.30s : %7.1f MB/s (%9u)\n", benchNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode); +_cleanOut: free(dstBuff); free(buff2); ZSTD_freeDCtx(g_dctxPtr); From 2666d3e56c0b58307ce42ba52e24055b6246d1f1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 14 Nov 2015 11:07:51 +0100 Subject: [PATCH 10/79] more tests --- programs/Makefile | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index ac713c3f9c7..f728502ffdb 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -97,7 +97,7 @@ datagen : datagen.c datagencli.c $(CC) $(FLAGS) $^ -o $@$(EXT) clean: - @rm -f core *.o tmp \ + @rm -f core *.o tmp* \ zstd$(EXT) zstd32$(EXT) \ fullbench$(EXT) fullbench32$(EXT) \ fuzzer$(EXT) fuzzer32$(EXT) \ @@ -153,11 +153,19 @@ test-zstd: zstd datagen echo foo | ./zstd > /dev/full; if [ $$? -eq 0 ] ; then echo "write error not detected!"; false; fi echo foo | ./zstd | ./zstd -d > /dev/full; if [ $$? -eq 0 ] ; then echo "write error not detected!"; false; fi @echo "**** zstd round-trip tests **** " - ./datagen | ./zstd -v | ./zstd -d > $(VOID) - ./datagen | ./zstd -6 -v | ./zstd -d > $(VOID) - ./datagen -g256MB | ./zstd -v | ./zstd -d > $(VOID) - ./datagen -g256MB | ./zstd -3 -v | ./zstd -d > $(VOID) - ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d > $(VOID) + ./datagen | md5sum > tmp1 + ./datagen | ./zstd -v | ./zstd -d | md5sum > tmp2 + diff tmp1 tmp2 # check potential differences + ./datagen | ./zstd -6 -v | ./zstd -d | md5sum > tmp2 + diff tmp1 tmp2 # check potential differences + ./datagen -g256MB | md5sum > tmp1 + ./datagen -g256MB | ./zstd -v | ./zstd -d | md5sum > tmp2 + #diff tmp1 tmp2 # check potential differences + ./datagen -g256MB | ./zstd -3 -v | ./zstd -d | md5sum > tmp2 + #diff tmp1 tmp2 # check potential differences + ./datagen -g6GB -P99 | md5sum > tmp1 + ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d | md5sum > tmp2 + #diff tmp1 tmp2 # check potential differences test-zstd32: zstd32 datagen ./datagen | ./zstd32 -v | ./zstd32 -d > $(VOID) From eaa5f75b992c3ba936e3928c6a605e6e36634a79 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 15 Nov 2015 11:13:41 +0100 Subject: [PATCH 11/79] fixed decompression bug --- lib/zstd_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index e195c7c17b2..d395983cbe0 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -496,7 +496,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, match = dictEnd - (base-match); if (match + sequence.matchLength <= dictEnd - 8) { - ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + ZSTD_wildcopy(op, match, sequence.matchLength); /* works even if matchLength < 8 */ return oMatchEnd - ostart; } if (match + sequence.matchLength <= dictEnd) From 82368cfa1bafa40fd31c821a8ffc0be3720a7355 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 16 Nov 2015 19:10:56 +0100 Subject: [PATCH 12/79] comment --- lib/zstd_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index d395983cbe0..472bc9340e4 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -310,7 +310,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen /* sequences */ { - S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ + S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */ size_t headerSize; /* Build DTables */ From 786eb7d42e97478b3ab82d8e7384fd52a870104f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 17 Nov 2015 12:23:53 +0100 Subject: [PATCH 13/79] fixed repcode bug --- lib/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a128ed8cca0..95a6e774ad4 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -863,7 +863,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, { size_t litLength = ip-anchor; - size_t maxLength = MIN((size_t)(iend-ip-MINMATCH), (size_t)(dictLimit - matchIndex)); /* works even if matchIndex > dictLimit */ + size_t maxLength = matchIndex < dictLimit ? MIN((size_t)(iend-ip-MINMATCH), (size_t)(dictLimit - matchIndex)) : (size_t)(iend-ip-MINMATCH); const BYTE* const iEndCount = ip + maxLength; size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); size_t offsetCode = current-matchIndex; From 138e89c9c42ffc8ea529b1902dfbd50bd763244b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 17 Nov 2015 14:26:54 +0100 Subject: [PATCH 14/79] fix compression bug (extDict mode) --- lib/zstd_compress.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 95a6e774ad4..ad3db50c5ad 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -823,7 +823,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; /* init */ @@ -846,7 +846,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : base + dictLimit; const BYTE* match = matchBase + matchIndex; const U32 current = (U32)(ip-base); - const U32 repIndex = current - (U32)offset_2; + const U32 repIndex = current - offset_2; const BYTE* repBase = repIndex < dictLimit ? dictBase : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ @@ -863,10 +863,12 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, { size_t litLength = ip-anchor; - size_t maxLength = matchIndex < dictLimit ? MIN((size_t)(iend-ip-MINMATCH), (size_t)(dictLimit - matchIndex)) : (size_t)(iend-ip-MINMATCH); - const BYTE* const iEndCount = ip + maxLength; + const BYTE* matchEnd = matchIndex < dictLimit ? dictBase + dictLimit : iend; + const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); size_t offsetCode = current-matchIndex; + if (matchIndex + matchLength + MINMATCH == dictLimit) + matchLength += ZSTD_count(ip + matchLength + MINMATCH, base + dictLimit, iend); if (offsetCode == offset_2) offsetCode = 0; offset_2 = offset_1; offset_1 = current-matchIndex; From 94b9d8ec8db8d4ca937ea55502e89a579c33c8cd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 17 Nov 2015 16:14:55 +0100 Subject: [PATCH 15/79] fix compression bug in rollbuffer mode --- lib/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ad3db50c5ad..53dfc742ff7 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -867,7 +867,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); size_t offsetCode = current-matchIndex; - if (matchIndex + matchLength + MINMATCH == dictLimit) + if (match + matchLength + MINMATCH == matchEnd) matchLength += ZSTD_count(ip + matchLength + MINMATCH, base + dictLimit, iend); if (offsetCode == offset_2) offsetCode = 0; offset_2 = offset_1; From b2549846ba6488cd5610e4f580dfdf1c843a7d32 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 18 Nov 2015 11:29:32 +0100 Subject: [PATCH 16/79] more tests --- lib/zstd_decompress.c | 132 ++++++++++++++++++++---------------------- programs/Makefile | 10 ++-- 2 files changed, 68 insertions(+), 74 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 472bc9340e4..1f232ccb50d 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -489,68 +489,62 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */ if (sequence.offset > (size_t)op) return ERROR(corruption_detected); /* address space overflow test (this test seems kept by clang optimizer) */ - if (match < base) + if (match < base) { - /* offset beyond prefix */ - if (match < vBase) return ERROR(corruption_detected); - match = dictEnd - (base-match); - if (match + sequence.matchLength <= dictEnd - 8) - { - ZSTD_wildcopy(op, match, sequence.matchLength); /* works even if matchLength < 8 */ - return oMatchEnd - ostart; - } - if (match + sequence.matchLength <= dictEnd) - { - memcpy(op, match, sequence.matchLength); - return oMatchEnd - ostart; - } - /* span extDict & currentPrefixSegment */ - { - size_t length1 = dictEnd - match; - size_t length2 = sequence.matchLength - length1; - memcpy(op, match, length1); - op += length1; - memcpy(op, base, length2); /* will fail in case of overlapping match */ - return oMatchEnd - ostart; - } - } - - { - /* match within prefix */ - if (sequence.offset < 8) - { - /* close range match, overlap */ - const int dec64 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= dec64; - } - else - { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-12) - { - if (op < oend_8) - { - ZSTD_wildcopy(op, match, oend_8 - op); - match += oend_8 - op; - op = oend_8; - } - while (op < oMatchEnd) *op++ = *match++; - } - else - { - ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ - } - return oMatchEnd - ostart; - } + /* offset beyond prefix */ + if (match < vBase) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) + { + memcpy(op, match, sequence.matchLength); + return oMatchEnd - ostart; + } + /* span extDict & currentPrefixSegment */ + { + size_t length1 = dictEnd - match; + memcpy(op, match, length1); + op += length1; + sequence.matchLength -= length1; + match = base; + } + } + + { + /* match within prefix */ + if (sequence.offset < 8) + { + /* close range match, overlap */ + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } + else + { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-12) + { + if (op < oend_8) + { + ZSTD_wildcopy(op, match, oend_8 - op); + match += oend_8 - op; + op = oend_8; + } + while (op < oMatchEnd) *op++ = *match++; + } + else + { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return oMatchEnd - ostart; + } } @@ -660,8 +654,8 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v blockProperties_t blockProperties; - /* init */ - ctx->base = ctx->vBase = ctx->dictEnd = dst; + /* init */ + ctx->base = ctx->vBase = ctx->dictEnd = dst; /* Frame Header */ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); @@ -735,12 +729,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con if (srcSize != ctx->expected) return ERROR(srcSize_wrong); if (dst != ctx->previousDstEnd) /* not contiguous */ { - ctx->dictEnd = ctx->previousDstEnd; - if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment right into tracked memory */ - ctx->base = (char*)dst + maxDstSize; /* temporary affectation, for vBase calculation */ - ctx->vBase = (char*)dst - ((char*)(ctx->dictEnd) - (char*)(ctx->base)); - ctx->base = dst; - } + ctx->dictEnd = ctx->previousDstEnd; + if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment right into tracked memory */ + ctx->base = (char*)dst + maxDstSize; /* temporary affectation, for vBase calculation */ + ctx->vBase = (char*)dst - ((char*)(ctx->dictEnd) - (char*)(ctx->base)); + ctx->base = dst; + } /* Decompress : frame header */ if (ctx->phase == 0) diff --git a/programs/Makefile b/programs/Makefile index f728502ffdb..f46afab8f55 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -155,17 +155,17 @@ test-zstd: zstd datagen @echo "**** zstd round-trip tests **** " ./datagen | md5sum > tmp1 ./datagen | ./zstd -v | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 # check potential differences + diff tmp1 tmp2 ./datagen | ./zstd -6 -v | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 # check potential differences + diff tmp1 tmp2 ./datagen -g256MB | md5sum > tmp1 ./datagen -g256MB | ./zstd -v | ./zstd -d | md5sum > tmp2 - #diff tmp1 tmp2 # check potential differences + diff tmp1 tmp2 ./datagen -g256MB | ./zstd -3 -v | ./zstd -d | md5sum > tmp2 - #diff tmp1 tmp2 # check potential differences + diff tmp1 tmp2 ./datagen -g6GB -P99 | md5sum > tmp1 ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d | md5sum > tmp2 - #diff tmp1 tmp2 # check potential differences + diff tmp1 tmp2 test-zstd32: zstd32 datagen ./datagen | ./zstd32 -v | ./zstd32 -d > $(VOID) From dcac00e7a60b040429286f64d8fa92f3621790f0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Nov 2015 11:23:44 +0100 Subject: [PATCH 17/79] update library builder --- lib/Makefile | 13 ++++++++----- lib/zstd_decompress.c | 7 +++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index f027076f0d6..be82c34635e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -27,8 +27,8 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # You can contact the author at : +# - ZSTD homepage : http://www.zstd.net # - ZSTD source repository : https://github.com/Cyan4973/zstd -# - Public forum : https://groups.google.com/forum/#!forum/lz4c # ################################################################ # Version numbers @@ -63,14 +63,17 @@ else SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER) endif -default: libzstd -all: libzstd +.PHONY: default all clean install uninstall -libzstd: zstd_compress.c zstd_decompress.c huff0.c fse.c +default: clean libzstd + +all: clean libzstd + +libzstd: zstd_compress.c zstd_decompress.c huff0.c fse.c legacy/zstd_v01.c legacy/zstd_v02.c @echo compiling static library @$(CC) $(FLAGS) -c $^ - @$(AR) rcs libzstd.a zstd_compress.o zstd_decompress.o huff0.o fse.o + @$(AR) rcs libzstd.a *.o @echo compiling dynamic library $(LIBVER) @$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER) @echo creating versioned links diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 1f232ccb50d..87548b6c207 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -35,9 +35,8 @@ *****************************************************************/ /*! * HEAPMODE : - * Select how default compression functions will allocate memory for their hash table, + * Select how default functions will allocate memory for their context, * in memory stack (0, fastest), or in memory heap (1, requires malloc()) - * Note that compression context is fairly large, as a consequence heap memory is recommended. */ #ifndef ZSTD_HEAPMODE # define ZSTD_HEAPMODE 1 @@ -393,6 +392,10 @@ typedef struct { } seqState_t; +/** ZSTD_decodeSequence +* Decode the next sequence, defined as nbLiterals, PtrToLiterals, nbMatches, Offset +* @seq : store sequence into this seq_t +*/ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) { size_t litLength; From 110cc14bab4e4dd689201a6334d9b992e7e1fc69 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Nov 2015 12:02:28 +0100 Subject: [PATCH 18/79] fixed bug #73 notified by @nemequ --- lib/zstd_compress.c | 7 ++++--- programs/fuzzer.c | 23 ++++++++++++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 53dfc742ff7..ccfabf896a3 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1218,8 +1218,8 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( /* common lazy function, to be inlined */ FORCE_INLINE size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, const void* src, size_t srcSize, - const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */ + void* dst, size_t maxDstSize, const void* src, size_t srcSize, + const U32 searchMethod, const U32 deep) /* searchMethod : 0 = hc; 1 = bt */ { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -1496,6 +1496,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy, ctx->lowLimit < ctx->dictLimit); + if (srcSize < MIN_CBLOCK_SIZE+3) return 0; /* don't even attempt compression below a certain srcSize */ return blockCompressor(ctx, dst, maxDstSize, src, srcSize); } @@ -1536,7 +1537,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, op[0] = (BYTE)(cSize>>16); op[1] = (BYTE)(cSize>>8); op[2] = (BYTE)cSize; - op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ + op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ cSize += 3; } diff --git a/programs/fuzzer.c b/programs/fuzzer.c index ec1ccbd548b..d3af3ff89b0 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -63,7 +63,7 @@ #define MB *(1U<<20) #define GB *(1U<<30) -static const U32 nbTestsDefault = 32 KB; +static const U32 nbTestsDefault = 30000; #define COMPRESSIBLE_NOISE_LENGTH (10 MB) #define FUZ_COMPRESSIBILITY_DEFAULT 50 static const U32 prime1 = 2654435761U; @@ -89,6 +89,8 @@ static U32 g_time = 0; /********************************************************* * Fuzzer functions *********************************************************/ +#define MAX(a,b) ((a)>(b)?(a):(b)) + static U32 FUZ_GetMilliStart(void) { struct timeb tb; @@ -299,6 +301,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit U32 sampleSizeLog, buffNb, cLevelMod; U64 crcOrig, crcDest; int cLevel; + BYTE* sampleBuffer; /* init */ DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); @@ -325,13 +328,17 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit sampleSize = (size_t)1 << sampleSizeLog; sampleSize += FUZ_rand(&lseed) & (sampleSize-1); sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize); - crcOrig = XXH64(srcBuffer + sampleStart, sampleSize, 0); - /* HC compression test */ -#define MAX(a,b) ((a)>(b)?(a):(b)) + /* create sample buffer (to catch read error with valgrind & sanitizers) */ + sampleBuffer = (BYTE*)malloc(sampleSize); + CHECK (sampleBuffer==NULL, "not enough memory for sample buffer"); + memcpy(sampleBuffer, srcBuffer + sampleStart, sampleSize); + crcOrig = XXH64(sampleBuffer, sampleSize, 0); + + /* compression test */ cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2)); /* use high compression levels with small samples, for speed */ cLevel = (FUZ_rand(&lseed) % cLevelMod) +1; - cSize = ZSTD_compressCCtx(hcctx, cBuffer, cBufferSize, srcBuffer + sampleStart, sampleSize, cLevel); + cSize = ZSTD_compressCCtx(hcctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed"); /* compression failure test : too small dest buffer */ @@ -343,7 +350,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit static const U32 endMark = 0x4DC2B1A9; U32 endCheck; memcpy(dstBuffer+tooSmallSize, &endMark, 4); - errorCode = ZSTD_compressCCtx(hcctx, dstBuffer, tooSmallSize, srcBuffer + sampleStart, sampleSize, cLevel); + errorCode = ZSTD_compressCCtx(hcctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); memcpy(&endCheck, dstBuffer+tooSmallSize, 4); CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); @@ -354,7 +361,9 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit dSize = ZSTD_decompress(dstBuffer, sampleSize + dSupSize, cBuffer, cSize); CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s) (srcSize : %u ; cSize : %u)", ZSTD_getErrorName(dSize), (U32)sampleSize, (U32)cSize); crcDest = XXH64(dstBuffer, sampleSize, 0); - CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(srcBuffer+sampleStart, dstBuffer, sampleSize), (U32)sampleSize); + CHECK(crcOrig != crcDest, "decompression result corrupted (pos %u / %u)", (U32)findDiff(sampleBuffer, dstBuffer, sampleSize), (U32)sampleSize); + + free(sampleBuffer); /* no longer useful after this point */ /* truncated src decompression test */ { From b3a2af970e403a59c1a6b33c2df079945ad628f3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Nov 2015 17:13:19 +0100 Subject: [PATCH 19/79] refactored decompression --- lib/zstd_decompress.c | 166 +++++++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 82 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 87548b6c207..c6938d48b82 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -35,8 +35,9 @@ *****************************************************************/ /*! * HEAPMODE : - * Select how default functions will allocate memory for their context, + * Select how default compression functions will allocate memory for their hash table, * in memory stack (0, fastest), or in memory heap (1, requires malloc()) + * Note that compression context is fairly large, as a consequence heap memory is recommended. */ #ifndef ZSTD_HEAPMODE # define ZSTD_HEAPMODE 1 @@ -392,10 +393,6 @@ typedef struct { } seqState_t; -/** ZSTD_decodeSequence -* Decode the next sequence, defined as nbLiterals, PtrToLiterals, nbMatches, Offset -* @seq : store sequence into this seq_t -*/ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) { size_t litLength; @@ -461,18 +458,18 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, - seq_t sequence, + BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit_8, - BYTE* const base, BYTE* const vBase, BYTE* const dictEnd, - BYTE* const oend) + BYTE* const base, BYTE* const vBase, BYTE* const dictEnd) { - static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ - static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ - const BYTE* const ostart = op; + static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ BYTE* const oLitEnd = op + sequence.litLength; - BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ + const size_t sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ BYTE* const oend_8 = oend-8; const BYTE* const litEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; /* check */ if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ @@ -485,74 +482,67 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, *litPtr = litEnd; /* update for next sequence */ /* copy Match */ - { - const BYTE* match = op - sequence.offset; - - /* check */ - //if (match > op) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */ - if (sequence.offset > (size_t)op) return ERROR(corruption_detected); /* address space overflow test (this test seems kept by clang optimizer) */ - - if (match < base) - { - /* offset beyond prefix */ - if (match < vBase) return ERROR(corruption_detected); - match = dictEnd - (base-match); - if (match + sequence.matchLength <= dictEnd) - { - memcpy(op, match, sequence.matchLength); - return oMatchEnd - ostart; - } - /* span extDict & currentPrefixSegment */ - { - size_t length1 = dictEnd - match; - memcpy(op, match, length1); - op += length1; - sequence.matchLength -= length1; - match = base; - } - } - - { - /* match within prefix */ - if (sequence.offset < 8) - { - /* close range match, overlap */ - const int dec64 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= dec64; - } - else - { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-12) - { - if (op < oend_8) - { - ZSTD_wildcopy(op, match, oend_8 - op); - match += oend_8 - op; - op = oend_8; - } - while (op < oMatchEnd) *op++ = *match++; - } - else - { - ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ - } - return oMatchEnd - ostart; - } - - } - + /* check */ + //if (match > oLitEnd) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */ + if (sequence.offset > (size_t)oLitEnd) return ERROR(corruption_detected); /* address space overflow test (this test seems preserved by clang optimizer) */ + + if (match < base) + { + /* offset beyond prefix */ + if (match < vBase) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) + { + memcpy(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { + size_t length1 = dictEnd - match; + memcpy(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } + } + + /* match within prefix */ + if (sequence.offset < 8) + { + /* close range match, overlap */ + const int sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } + else + { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-12) + { + if (op < oend_8) + { + ZSTD_wildcopy(op, match, oend_8 - op); + match += oend_8 - op; + op = oend_8; + } + while (op < oMatchEnd) *op++ = *match++; + } + else + { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; } + static size_t ZSTD_decompressSequences( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -599,20 +589,32 @@ static size_t ZSTD_decompressSequences( FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (nbSeq>0) ; ) + for ( ; (BIT_reloadDStream(&(seqState.DStream)) < BIT_DStream_completed) ; ) { size_t oneSeqSize; nbSeq--; ZSTD_decodeSequence(&sequence, &seqState); - oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litLimit_8, base, vBase, dictEnd, oend); + oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } - /* check if reached exact end */ - if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ + /* now BIT_reloadDStream(&(seqState.DStream)) >= BIT_DStream_completed) */ + for ( ; (BIT_reloadDStream(&(seqState.DStream)) == BIT_DStream_completed) && nbSeq ; ) + { + size_t oneSeqSize; + nbSeq--; + ZSTD_decodeSequence(&sequence, &seqState); + oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* check if reached exact end */ + if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and precisely consumed; otherwise data is corrupted */ + /* last literal segment */ { size_t lastLLSize = litEnd - litPtr; From c95f89972591c94aa497e3cebc8c0e951faeb3e7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Nov 2015 17:28:35 +0100 Subject: [PATCH 20/79] refactored greedy --- lib/zstd_compress.c | 83 +++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ccfabf896a3..72633fb8237 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1218,8 +1218,8 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( /* common lazy function, to be inlined */ FORCE_INLINE size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, const void* src, size_t srcSize, - const U32 searchMethod, const U32 deep) /* searchMethod : 0 = hc; 1 = bt */ + void* dst, size_t maxDstSize, const void* src, size_t srcSize, + const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */ { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -1389,53 +1389,57 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c /* Match Loop */ while (ip < ilimit) { - /* repcode */ - if (MEM_read32(ip) == MEM_read32(ip - offset_2)) - { - /* store sequence */ - size_t matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - size_t litLength = ip-anchor; - size_t offset = offset_2; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); - ip += matchLength+MINMATCH; - anchor = ip; - continue; - } - - offset_2 = offset_1; /* failed once : necessarily offset_1 now */ - - /* repcode at ip+1 */ + size_t matchLength; + size_t offset; + + /* priority to repcode at ip+1 */ if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) { - size_t matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); - size_t litLength = ip+1-anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); - ip += 1+matchLength+MINMATCH; - anchor = ip; - continue; + matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH; + ip ++; + offset = 0; } - - /* search */ + else { - size_t offset=999999; - size_t matchLength = ZSTD_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); + /* search */ + offset = 99999999; /* init to high value */ + matchLength = ZSTD_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (matchLength < MINMATCH) { + /* not found */ ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ continue; } - while ((ip>anchor) && (ip-offset>ctx->base) && (ip[-1] == ip[-1-offset])) { ip--; matchLength++; } /* catch up */ + /* match found */ + while ((ip>anchor) && (ip-offset>ctx->base) && (ip[-1] == ip[-1-offset])) { ip--; matchLength++; } /* catch up */ + } + + /* store found sequence */ + { + size_t litLength = ip-anchor; + offset_2 = offset_1; + if (offset) offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) + { /* store sequence */ - { - size_t litLength = ip-anchor; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset_1, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - } + size_t litLength = ip-anchor; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + offset = offset_2; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); + ip += matchLength+MINMATCH; + anchor = ip; + continue; } + } /* Last Literals */ @@ -1496,7 +1500,6 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy, ctx->lowLimit < ctx->dictLimit); - if (srcSize < MIN_CBLOCK_SIZE+3) return 0; /* don't even attempt compression below a certain srcSize */ return blockCompressor(ctx, dst, maxDstSize, src, srcSize); } @@ -1537,7 +1540,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, op[0] = (BYTE)(cSize>>16); op[1] = (BYTE)(cSize>>8); op[2] = (BYTE)cSize; - op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ + op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ cSize += 3; } From 7dfd56be4ade224b6a626de0f476306c90b73e80 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Nov 2015 17:46:29 +0100 Subject: [PATCH 21/79] restored fix small blocks --- lib/zstd_compress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 72633fb8237..e262b4657a5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1500,6 +1500,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy, ctx->lowLimit < ctx->dictLimit); + if (srcSize < MIN_CBLOCK_SIZE+3) return 0; /* don't even attempt compression below a certain srcSize */ return blockCompressor(ctx, dst, maxDstSize, src, srcSize); } From dc5e3e96f8eb8c790a170df7bb4fba0e62b78d3c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Nov 2015 09:23:56 +0100 Subject: [PATCH 22/79] minor memsave --- programs/fuzzer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index d3af3ff89b0..793af7f9cc3 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -266,11 +266,9 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit U32 testNb = 0; U32 coreSeed = seed, lseed = 0; ZSTD_CCtx* ctx; - ZSTD_CCtx* hcctx; /* allocation */ ctx = ZSTD_createCCtx(); - hcctx = ZSTD_createCCtx(); cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize); cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize); @@ -278,7 +276,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize); dstBuffer = (BYTE*)malloc (dstBufferSize); cBuffer = (BYTE*)malloc (cBufferSize); - CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !dstBuffer || !cBuffer || !ctx || !hcctx, + CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !dstBuffer || !cBuffer || !ctx, "Not enough memory, fuzzer tests cancelled"); /* Create initial samples */ @@ -338,7 +336,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit /* compression test */ cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2)); /* use high compression levels with small samples, for speed */ cLevel = (FUZ_rand(&lseed) % cLevelMod) +1; - cSize = ZSTD_compressCCtx(hcctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); + cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed"); /* compression failure test : too small dest buffer */ @@ -350,7 +348,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit static const U32 endMark = 0x4DC2B1A9; U32 endCheck; memcpy(dstBuffer+tooSmallSize, &endMark, 4); - errorCode = ZSTD_compressCCtx(hcctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); + errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); memcpy(&endCheck, dstBuffer+tooSmallSize, 4); CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); @@ -442,7 +440,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit _cleanup: ZSTD_freeCCtx(ctx); - ZSTD_freeCCtx(hcctx); free(cNoiseBuffer[0]); free(cNoiseBuffer[1]); free(cNoiseBuffer[2]); From 6a11a69b15b47e0eb0d558acdf192f69128f8ab0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Nov 2015 12:00:25 +0100 Subject: [PATCH 23/79] added : repcode generation to datagen --- programs/datagen.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/programs/datagen.c b/programs/datagen.c index 23d7d15f239..7fb98e83722 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -138,6 +138,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match const U32 matchProba32 = (U32)(32768 * matchProba); size_t pos = prefixSize; U32* seed = seedPtr; + U32 prevOffset = 1; /* special case : sparse content */ while (matchProba >= 1.0) @@ -167,14 +168,16 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match { /* Copy (within 32K) */ size_t match; - size_t d; - int length = RDG_RANDLENGTH + 4; + size_t length = RDG_RANDLENGTH + 4; U32 offset = RDG_RAND15BITS + 1; + U32 repeatOffset = (RDG_rand(seed) & 15) == 2; + if (repeatOffset) offset = prevOffset; if (offset > pos) offset = (U32)pos; match = pos - offset; - d = pos + length; - if (d > buffSize) d = buffSize; - while (pos < d) buffPtr[pos++] = buffPtr[match++]; + if (length > buffSize-pos) length = buffSize-pos; + memcpy(buffPtr+pos, buffPtr+match, length); + pos += length; + prevOffset = offset; } else { From 743402cb236db4599508a537c1558bc19994ad92 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Nov 2015 12:03:53 +0100 Subject: [PATCH 24/79] modified format --- lib/zstd_compress.c | 222 ++++++++++++++++++++++++++++---------------- 1 file changed, 143 insertions(+), 79 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e262b4657a5..b2227b6275d 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -173,7 +173,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, /* reserve table memory */ { - const U32 contentLog = params.strategy == ZSTD_fast ? 1 : params.contentLog; + const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32); const size_t neededSpace = tableSpace + WORKPLACESIZE; if (zc->workSpaceSize < neededSpace) @@ -482,9 +482,9 @@ size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, U32 nbBits = (offCode-1) * (!!offCode); BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ BIT_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */ FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */ BIT_flushBits(&blockStream); /* 7 */ /* 7 */ @@ -731,6 +731,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, /* init */ + ZSTD_resetSeqStore(seqStorePtr); if (ip == base) { hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1; @@ -738,37 +739,61 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, hashTable[ZSTD_hashPtr(base+3, hBits, mls)] = 3; ip = base+4; } - ZSTD_resetSeqStore(seqStorePtr); /* Main Search Loop */ - while (ip < ilimit) /* < instead of <=, because unconditionnal ZSTD_addPtr(ip+1) */ + while (ip < ilimit) /* < instead of <=, because repcode check at (ip+1) */ { + size_t matchLength; + size_t offset; const size_t h = ZSTD_hashPtr(ip, hBits, mls); const BYTE* match = base + hashTable[h]; hashTable[h] = (U32)(ip-base); - if (MEM_read32(ip-offset_2) == MEM_read32(ip)) match = ip-offset_2; - if ( (match < lowest) || - (MEM_read32(match) != MEM_read32(ip)) ) - { ip += ((ip-anchor) >> g_searchStrength) + 1; offset_2 = offset_1; continue; } - while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; } /* catch up */ - + if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { - size_t litLength = ip-anchor; - size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); - size_t offsetCode = ip-match; - if (offsetCode == offset_2) offsetCode = 0; - offset_2 = offset_1; - offset_1 = ip-match; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength); + matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); + ip++; + offset = 0; + } + else + { + if ( (match < lowest) || + (MEM_read32(match) != MEM_read32(ip)) ) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); + while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ + offset = ip-match; + offset_2 = offset_1; + offset_1 = offset; + } - /* Fill Table */ - hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = (U32)(ip+1-base); - ip += matchLength + MINMATCH; - anchor = ip; - if (ip < ilimit) /* same test as loop, for speed */ - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - } + /* match found */ + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); + ip += matchLength + MINMATCH; + anchor = ip; + + if (ip <= ilimit) + { + /* Fill Table */ + hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) + { + /* store sequence */ + size_t ml = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); + ip += ml+MINMATCH; + anchor = ip; + continue; /* faster when present ... (?) */ + } + } } /* Last Literals */ @@ -820,6 +845,8 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* anchor = istart; const U32 lowLimit = ctx->lowLimit; const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; @@ -838,49 +865,85 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, } /* Main Search Loop */ - while (ip < ilimit) /* < instead of <=, because unconditional ZSTD_addPtr(ip+1) */ + while (ip < ilimit) /* < instead of <=, because (ip+1) */ { const size_t h = ZSTD_hashPtr(ip, hBits, mls); - U32 matchIndex = hashTable[h]; + const U32 matchIndex = hashTable[h]; const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : base + dictLimit; const BYTE* match = matchBase + matchIndex; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : lowPrefixPtr; const U32 current = (U32)(ip-base); - const U32 repIndex = current - offset_2; - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; const BYTE* repMatch = repBase + repIndex; + size_t matchLength; + U32 offset; hashTable[h] = current; /* update hash table */ if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) - &&(MEM_read32(repMatch) == MEM_read32(ip)) ) - match = repMatch, matchIndex = repIndex, matchBase = repBase, - lowMatchPtr = repIndex < dictLimit ? dictBase + lowLimit : base + dictLimit ; - if ( (matchIndex < lowLimit) || - (MEM_read32(match) != MEM_read32(ip)) ) - { ip += ((ip-anchor) >> g_searchStrength) + 1; offset_2 = offset_1; continue; } - /* found a match */ - while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; } /* catch up */ - + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - size_t litLength = ip-anchor; - const BYTE* matchEnd = matchIndex < dictLimit ? dictBase + dictLimit : iend; - const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; - size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); - size_t offsetCode = current-matchIndex; - if (match + matchLength + MINMATCH == matchEnd) - matchLength += ZSTD_count(ip + matchLength + MINMATCH, base + dictLimit, iend); - if (offsetCode == offset_2) offsetCode = 0; - offset_2 = offset_1; - offset_1 = current-matchIndex; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength); + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + const BYTE* iEndCount = (repMatchEnd - repMatch < iend - ip - 1) ? ip + 1 + (repMatchEnd - repMatch) : iend; + matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, iEndCount); + if (match + matchLength + MINMATCH == dictEnd) + matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); + ip++; + offset = 0; + } + else + { + if ( (matchIndex < lowLimit) || + (MEM_read32(match) != MEM_read32(ip)) ) + { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } + { + const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; + matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); + if (match + matchLength + MINMATCH == dictEnd) + matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); + while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + } + } + + /* found a match */ + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); + ip += matchLength + MINMATCH; + anchor = ip; - /* Fill Table */ - hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = current+1; - ip += matchLength + MINMATCH; - anchor = ip; - if (ip < ilimit) /* same test as main loop, helps speed */ - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - } + if (ip <= ilimit) + { + /* Fill Table */ + hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) + { + U32 current2 = (U32)(ip-base); + U32 repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) + { + size_t maxIlength = iend - ip; + size_t maxMlength = repIndex2 < dictLimit ? dictLimit - repIndex2 : iend - repMatch2; + size_t maxML = MIN(maxMlength, maxIlength); + size_t ml = ZSTD_count(ip+MINMATCH, repMatch2+MINMATCH, ip + maxML); + U32 tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + if (ml+MINMATCH == maxMlength) /* reached end of extDict */ + ml += ZSTD_count(ip+MINMATCH+ml, lowPrefixPtr, iend); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); + ip += ml+MINMATCH; + anchor = ip; + continue; + } + break; + } + } } /* Last Literals */ @@ -1248,22 +1311,19 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, size_t offset=999999; const BYTE* start; - /* try to find a first match */ - if (MEM_read32(ip) == MEM_read32(ip - offset_2)) + /* check repCode */ + if (MEM_read32(ip) == MEM_read32(ip - offset_1)) { - /* repcode : we take it*/ - size_t offtmp = offset_2; + /* repcode : we take it */ size_t litLength = ip - anchor; - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - offset_2 = offset_1; - offset_1 = offtmp; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend); ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); ip += matchLength+MINMATCH; anchor = ip; continue; } - offset_2 = offset_1; + /* search first solution */ matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls); if (matchLength < MINMATCH) { @@ -1329,16 +1389,30 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, { while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset])) { start--; matchLength++; } + offset_2 = offset_1; offset_1 = offset; } /* store sequence */ { size_t litLength = start - anchor; - if (offset) offset_1 = offset; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) + { + /* store sequence */ + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + offset = offset_2; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength); + ip += matchLength+MINMATCH; + anchor = ip; + continue; /* faster when present ... (?) */ + } } /* Last Literals */ @@ -1412,13 +1486,12 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c } /* match found */ while ((ip>anchor) && (ip-offset>ctx->base) && (ip[-1] == ip[-1-offset])) { ip--; matchLength++; } /* catch up */ + offset_2 = offset_1, offset_1 = offset; } /* store found sequence */ { size_t litLength = ip-anchor; - offset_2 = offset_1; - if (offset) offset_1 = offset; ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); ip += matchLength; anchor = ip; @@ -1429,15 +1502,14 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ - size_t litLength = ip-anchor; matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); offset = offset_2; offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength); ip += matchLength+MINMATCH; anchor = ip; - continue; + continue; /* faster when present ... (?) */ } } @@ -1646,14 +1718,6 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, BYTE* op = ostart; size_t oSize; - /* correct params, to use less memory */ - { - U32 srcLog = ZSTD_highbit((U32)srcSize-1) + 1; - U32 contentBtPlus = (ctx->params.strategy == ZSTD_btlazy2); - if (params.windowLog > srcLog) params.windowLog = srcLog; - if (params.contentLog > srcLog+contentBtPlus) params.contentLog = srcLog+contentBtPlus; - } - /* Header */ oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params, srcSize); if(ZSTD_isError(oSize)) return oSize; From 55aa7f94e3dfbc35f3ad2f427674ef529db5840c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Nov 2015 12:04:52 +0100 Subject: [PATCH 25/79] modified format --- lib/zstd_decompress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index c6938d48b82..430dc7acad5 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -404,8 +404,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) /* Literal length */ litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); - prevOffset = litLength ? seq->offset : seqState->prevOffset; - seqState->prevOffset = seq->offset; + prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { U32 add = *dumps++; @@ -432,6 +431,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); if (offsetCode==0) offset = prevOffset; /* cmove */ + if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */ } /* MatchLength */ From 402fdcf1a3d855f3c99f0c18dfeb692c35c74353 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Nov 2015 12:46:08 +0100 Subject: [PATCH 26/79] fixed asan --- lib/zstd_compress.c | 228 ++++++++++++++++++++++---------------------- programs/datagen.c | 13 ++- 2 files changed, 120 insertions(+), 121 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index b2227b6275d..e313c40ee3f 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -751,49 +751,49 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { - matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); - ip++; - offset = 0; - } - else - { - if ( (match < lowest) || - (MEM_read32(match) != MEM_read32(ip)) ) - { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); - while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ - offset = ip-match; - offset_2 = offset_1; - offset_1 = offset; - } - - /* match found */ - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); - ip += matchLength + MINMATCH; - anchor = ip; - - if (ip <= ilimit) - { - /* Fill Table */ - hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) - { - /* store sequence */ - size_t ml = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); - ip += ml+MINMATCH; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } + matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); + ip++; + offset = 0; + } + else + { + if ( (match < lowest) || + (MEM_read32(match) != MEM_read32(ip)) ) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); + while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ + offset = ip-match; + offset_2 = offset_1; + offset_1 = offset; + } + + /* match found */ + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); + ip += matchLength + MINMATCH; + anchor = ip; + + if (ip <= ilimit) + { + /* Fill Table */ + hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) + { + /* store sequence */ + size_t ml = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); + ip += ml+MINMATCH; + anchor = ip; + continue; /* faster when present ... (?) */ + } + } } /* Last Literals */ @@ -874,7 +874,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : lowPrefixPtr; const U32 current = (U32)(ip-base); const U32 repIndex = current + 1 - offset_1; - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; const BYTE* repMatch = repBase + repIndex; size_t matchLength; U32 offset; @@ -883,67 +883,67 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* iEndCount = (repMatchEnd - repMatch < iend - ip - 1) ? ip + 1 + (repMatchEnd - repMatch) : iend; - matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, iEndCount); + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + const BYTE* iEndCount = (repMatchEnd - repMatch < iend - ip - 1) ? ip + 1 + (repMatchEnd - repMatch) : iend; + matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, iEndCount); if (match + matchLength + MINMATCH == dictEnd) matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); ip++; - offset = 0; - } - else - { - if ( (matchIndex < lowLimit) || - (MEM_read32(match) != MEM_read32(ip)) ) - { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } - { - const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; - matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); - if (match + matchLength + MINMATCH == dictEnd) - matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); - while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ - offset = current - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - } - } - + offset = 0; + } + else + { + if ( (matchIndex < lowLimit) || + (MEM_read32(match) != MEM_read32(ip)) ) + { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } + { + const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; + matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); + if (match + matchLength + MINMATCH == dictEnd) + matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); + while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + } + } + /* found a match */ - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); - ip += matchLength + MINMATCH; - anchor = ip; - - if (ip <= ilimit) - { - /* Fill Table */ - hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) - { - U32 current2 = (U32)(ip-base); - U32 repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); + ip += matchLength + MINMATCH; + anchor = ip; + + if (ip <= ilimit) + { + /* Fill Table */ + hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) + { + U32 current2 = (U32)(ip-base); + const U32 repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) - { - size_t maxIlength = iend - ip; - size_t maxMlength = repIndex2 < dictLimit ? dictLimit - repIndex2 : iend - repMatch2; - size_t maxML = MIN(maxMlength, maxIlength); + { + size_t maxIlength = iend - ip; + size_t maxMlength = repIndex2 < dictLimit ? (size_t)(dictLimit - repIndex2) : (size_t)(iend - repMatch2); + size_t maxML = MIN(maxMlength, maxIlength); size_t ml = ZSTD_count(ip+MINMATCH, repMatch2+MINMATCH, ip + maxML); - U32 tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + U32 tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ if (ml+MINMATCH == maxMlength) /* reached end of extDict */ - ml += ZSTD_count(ip+MINMATCH+ml, lowPrefixPtr, iend); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); - ip += ml+MINMATCH; - anchor = ip; - continue; - } + ml += ZSTD_count(ip+MINMATCH+ml, lowPrefixPtr, iend); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); + ip += ml+MINMATCH; + anchor = ip; + continue; + } break; - } - } + } + } } /* Last Literals */ @@ -1323,7 +1323,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, continue; } - /* search first solution */ + /* search first solution */ matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls); if (matchLength < MINMATCH) { @@ -1463,24 +1463,24 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c /* Match Loop */ while (ip < ilimit) { - size_t matchLength; - size_t offset; - + size_t matchLength; + size_t offset; + /* priority to repcode at ip+1 */ if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) { matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH; - ip ++; - offset = 0; + ip ++; + offset = 0; } - else + else { - /* search */ + /* search */ offset = 99999999; /* init to high value */ matchLength = ZSTD_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (matchLength < MINMATCH) { - /* not found */ + /* not found */ ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ continue; } @@ -1488,14 +1488,14 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c while ((ip>anchor) && (ip-offset>ctx->base) && (ip[-1] == ip[-1-offset])) { ip--; matchLength++; } /* catch up */ offset_2 = offset_1, offset_1 = offset; } - - /* store found sequence */ - { - size_t litLength = ip-anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - } + + /* store found sequence */ + { + size_t litLength = ip-anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + } /* check immediate repcode */ while ( (ip <= ilimit) @@ -1511,7 +1511,7 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c anchor = ip; continue; /* faster when present ... (?) */ } - + } /* Last Literals */ diff --git a/programs/datagen.c b/programs/datagen.c index 7fb98e83722..f5aa9c5dae8 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -81,7 +81,6 @@ typedef BYTE litDistribTable[LTSIZE]; - /********************************************************* * Local Functions *********************************************************/ @@ -130,7 +129,7 @@ static BYTE RDG_genChar(U32* seed, const litDistribTable lt) } -#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) +#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 0x7FFF) #define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr) { @@ -168,16 +167,16 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match { /* Copy (within 32K) */ size_t match; - size_t length = RDG_RANDLENGTH + 4; + size_t d; + int length = RDG_RANDLENGTH + 4; U32 offset = RDG_RAND15BITS + 1; U32 repeatOffset = (RDG_rand(seed) & 15) == 2; if (repeatOffset) offset = prevOffset; if (offset > pos) offset = (U32)pos; match = pos - offset; - if (length > buffSize-pos) length = buffSize-pos; - memcpy(buffPtr+pos, buffPtr+match, length); - pos += length; - prevOffset = offset; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ } else { From 7a231797796eba0eac9d0430bf3f143640e3877e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 21 Nov 2015 15:27:35 +0100 Subject: [PATCH 27/79] small fast compression improvement --- lib/zstd_compress.c | 135 +++++++++----------------------------------- 1 file changed, 28 insertions(+), 107 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e313c40ee3f..7a63254dea2 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1305,39 +1305,38 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; /* Match Loop */ - while (ip <= ilimit) + while (ip < ilimit) { - size_t matchLength; - size_t offset=999999; - const BYTE* start; + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; /* check repCode */ - if (MEM_read32(ip) == MEM_read32(ip - offset_1)) + if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) { /* repcode : we take it */ - size_t litLength = ip - anchor; - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); - ip += matchLength+MINMATCH; - anchor = ip; - continue; + matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH; + if (deep==0) goto _storeSequence; } - /* search first solution */ - matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls); - if (matchLength < MINMATCH) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + /* search first solution */ + size_t offsetFound = 99999999; + size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + if (matchLength < MINMATCH) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + } /* let's try to find a better solution */ - start = ip; - - while (ip=1) && (ipseqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - const U32 maxSearches = 1 << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; - - /* init */ - ZSTD_resetSeqStore(seqStorePtr); - if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; - - /* Match Loop */ - while (ip < ilimit) - { - size_t matchLength; - size_t offset; - - /* priority to repcode at ip+1 */ - if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) - { - matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH; - ip ++; - offset = 0; - } - else - { - /* search */ - offset = 99999999; /* init to high value */ - matchLength = ZSTD_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); - if (matchLength < MINMATCH) - { - /* not found */ - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - /* match found */ - while ((ip>anchor) && (ip-offset>ctx->base) && (ip[-1] == ip[-1-offset])) { ip--; matchLength++; } /* catch up */ - offset_2 = offset_1, offset_1 = offset; - } - - /* store found sequence */ - { - size_t litLength = ip-anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - } - - /* check immediate repcode */ - while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) - { - /* store sequence */ - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - offset = offset_2; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength); - ip += matchLength+MINMATCH; - anchor = ip; - continue; /* faster when present ... (?) */ - } - - } - - /* Last Literals */ - { - size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } - - /* Final compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, - - seqStorePtr, srcSize); + return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); } + typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) From 007c1c6ff08acac83c7448175a45fe7f2093ded7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 02:42:28 +0100 Subject: [PATCH 28/79] minor cr improvement --- lib/zstd_compress.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 7a63254dea2..0a44244344d 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1282,7 +1282,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( FORCE_INLINE size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, - const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */ + const U32 searchMethod, const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -1316,7 +1316,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, { /* repcode : we take it */ matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH; - if (deep==0) goto _storeSequence; + if (depth==0) goto _storeSequence; } { @@ -1333,22 +1333,23 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } /* let's try to find a better solution */ - while ((deep>=1) && (ip=1) + while (ip= MINMATCH) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; + if ((mlRep >= MINMATCH) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; } { size_t offset2=999999; size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*(3+deep) - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*(3+deep) - ZSTD_highbit((U32)offset+1) + (3+deep)); + int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { matchLength = ml2, offset = offset2, start = ip; @@ -1357,7 +1358,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } /* let's find an even better one */ - if ((deep==2) && (ip Date: Sun, 22 Nov 2015 02:53:43 +0100 Subject: [PATCH 29/79] fixed roll buffer in fast mode --- lib/zstd_compress.c | 264 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 245 insertions(+), 19 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 0a44244344d..e1a81dae183 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -884,10 +884,11 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* iEndCount = (repMatchEnd - repMatch < iend - ip - 1) ? ip + 1 + (repMatchEnd - repMatch) : iend; - matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, iEndCount); - if (match + matchLength + MINMATCH == dictEnd) - matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); + const BYTE* vEnd = ip+1 + (repMatchEnd-repMatch); + if (vEnd > iend) vEnd = iend; + matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, vEnd); + if (repMatch + matchLength + MINMATCH == dictEnd) + matchLength += ZSTD_count(ip+1 + matchLength + MINMATCH, lowPrefixPtr, iend); ip++; offset = 0; } @@ -1128,7 +1129,6 @@ static const BYTE* ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BY for( ; idx < target ; ) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares); - //ZSTD_insertBtAndFindBestMatch(zc, base+idx, iend, &dummy, nbCompares, mls); zc->nextToUpdate = idx; return base + idx; @@ -1214,6 +1214,8 @@ size_t ZSTD_HcFindBestMatch ( const BYTE* const dictBase = zc->dictBase; const U32 dictLimit = zc->dictLimit; const U32 lowLimit = zc->lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; @@ -1242,19 +1244,19 @@ size_t ZSTD_HcFindBestMatch ( else { match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) /* beware of end of dict */ + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 */ { size_t mlt; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iLimit) vLimit = iLimit; mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; - if ((ip+mlt == vLimit) && (vLimit < iLimit)) + if (match+mlt == dictBase+dictLimit) mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; } + if (mlt > ml) { ml = mlt; *offsetPtr = current - matchIndex; } } } - if (base + matchIndex <= ip - chainSize) break; + if (matchIndex <= minChain) break; matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } @@ -1278,7 +1280,9 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( } -/* common lazy function, to be inlined */ +/* ****************************** +* Common parser - lazy strategy +********************************/ FORCE_INLINE size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, @@ -1320,16 +1324,17 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } { - /* search first solution */ + /* first search (depth 0) */ size_t offsetFound = 99999999; size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - if (matchLength < MINMATCH) - { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < MINMATCH) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; } /* let's try to find a better solution */ @@ -1387,7 +1392,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* catch up */ if (offset) { - while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset])) + while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */ { start--; matchLength++; } offset_2 = offset_1; offset_1 = offset; } @@ -1449,6 +1454,227 @@ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, c } +FORCE_INLINE +size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1 << ctx->params.searchLog; + const U32 mls = ctx->params.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + if (((ip-base) - dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip < ilimit) + { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { + const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (MEM_read32(ip+1) == MEM_read32(repMatch)) + { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + const BYTE* vEnd = ip+1 + (repEnd - repMatch); + if (vEnd > iend) vEnd = iend; + matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; + if (repMatch + matchLength == dictEnd) + matchLength += ZSTD_count(ip+1+matchLength, prefixStart, iend); + if (depth==0) goto _storeSequence; + } + } + + { + /* first search (depth 0) */ + size_t offsetFound = 99999999; + size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < MINMATCH) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip iend) vEnd = iend; + repLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; + if (repMatch + repLength == dictEnd) + repLength += ZSTD_count(ip+repLength, prefixStart, iend); + { + int gain2 = (int)(repLength * 3); + int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= MINMATCH) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } + } + } + + /* search match, depth 1 */ + { + size_t offset2=999999; + size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + if ((ml2 >= MINMATCH) && (gain2 > gain1)) + { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } + } + + /* let's find an even better one */ + if ((depth==2) && (ip iend) vEnd = iend; + repLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; + if (repMatch + repLength == dictEnd) + repLength += ZSTD_count(ip+repLength, prefixStart, iend); + { + int gain2 = (int)(repLength * 4); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= MINMATCH) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } + } + } + + /* search match, depth 2 */ + { + size_t offset2=999999; + size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + if ((ml2 >= MINMATCH) && (gain2 > gain1)) + { + matchLength = ml2, offset = offset2, start = ip; + continue; + } + } + } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) + { + while ((start>anchor) && (start>prefixStart+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */ + { start--; matchLength++; } + offset_2 = offset_1; offset_1 = offset; + } + + /* store sequence */ +_storeSequence: + { + size_t litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) + { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (MEM_read32(ip) == MEM_read32(repMatch)) + { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + const BYTE* vEnd = ip + (repEnd - repMatch); + if (vEnd > iend) vEnd = iend; + matchLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; + if (repMatch + matchLength == dictEnd) + matchLength += ZSTD_count(ip+matchLength, prefixStart, iend); + offset = offset_2; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } + } + + /* Last Literals */ + { + size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } + + /* Final compression stage */ + return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); +} + +size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); +} + + typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); @@ -1462,7 +1688,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int case ZSTD_fast: return ZSTD_compressBlock_fast_extDict; case ZSTD_greedy: - return ZSTD_compressBlock_greedy; + return ZSTD_compressBlock_greedy_extDict; case ZSTD_lazy: return ZSTD_compressBlock_lazy; case ZSTD_lazy2: From 734aa92ed5404a0de76af0ea8c9393800b14b01b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 03:01:33 +0100 Subject: [PATCH 30/79] fixed roll buffer greedy mode --- lib/zstd_compress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e1a81dae183..a599712bc24 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1497,6 +1497,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; + if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ From b7fc88eec763d6fb2eb645420e50f6d710ecfffc Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 03:12:28 +0100 Subject: [PATCH 31/79] fixed roll buffer lazy mode --- lib/zstd_compress.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a599712bc24..ebe060e74be 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1537,6 +1537,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; + if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ @@ -1675,6 +1676,10 @@ size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDs return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); } +size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); +} typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); @@ -1691,7 +1696,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int case ZSTD_greedy: return ZSTD_compressBlock_greedy_extDict; case ZSTD_lazy: - return ZSTD_compressBlock_lazy; + return ZSTD_compressBlock_lazy_extDict; case ZSTD_lazy2: return ZSTD_compressBlock_lazy2; case ZSTD_btlazy2: From a85c77b9d3b8b8056f629ae93aa37b4e182258ba Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 12:22:04 +0100 Subject: [PATCH 32/79] fixed roll buffer lazy2 mode --- lib/zstd_compress.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ebe060e74be..a45fbab3173 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1581,6 +1581,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; + if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ @@ -1681,6 +1682,11 @@ size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstS return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); } +size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2); +} + typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); @@ -1698,7 +1704,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int case ZSTD_lazy: return ZSTD_compressBlock_lazy_extDict; case ZSTD_lazy2: - return ZSTD_compressBlock_lazy2; + return ZSTD_compressBlock_lazy2_extDict; case ZSTD_btlazy2: return ZSTD_compressBlock_btlazy2; } From 428619ccd799375d7a8d4c9fcb227e09299f77d7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 12:46:30 +0100 Subject: [PATCH 33/79] small cLevel update --- lib/zstd_static.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/zstd_static.h b/lib/zstd_static.h index fc4dbe30e56..8c56c13e413 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -154,16 +154,16 @@ static const ZSTD_parameters ZSTD_defaultParameters[2][ZSTD_MAX_CLEVEL+1] = { { /* for > 128 KB */ /* W, C, H, S, L, strat */ { 18, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ - { 19, 13, 14, 1, 7, ZSTD_fast }, /* level 1 - in fact redirected towards zstd fast */ + { 19, 13, 14, 1, 7, ZSTD_fast }, /* level 1 */ { 19, 15, 16, 1, 6, ZSTD_fast }, /* level 2 */ { 20, 18, 20, 1, 6, ZSTD_fast }, /* level 3 */ { 21, 19, 21, 1, 6, ZSTD_fast }, /* level 4 */ - { 20, 13, 18, 5, 5, ZSTD_greedy }, /* level 5 */ - { 20, 17, 19, 3, 5, ZSTD_greedy }, /* level 6 */ + { 20, 14, 18, 3, 5, ZSTD_greedy }, /* level 5 */ + { 20, 18, 19, 3, 5, ZSTD_greedy }, /* level 6 */ { 21, 17, 20, 3, 5, ZSTD_lazy }, /* level 7 */ { 21, 19, 20, 3, 5, ZSTD_lazy }, /* level 8 */ { 21, 20, 20, 3, 5, ZSTD_lazy2 }, /* level 9 */ - { 21, 19, 20, 4, 5, ZSTD_lazy2 }, /* level 10 */ + { 21, 19, 21, 4, 5, ZSTD_lazy2 }, /* level 10 */ { 22, 20, 22, 4, 5, ZSTD_lazy2 }, /* level 11 */ { 22, 20, 22, 5, 5, ZSTD_lazy2 }, /* level 12 */ { 22, 21, 22, 5, 5, ZSTD_lazy2 }, /* level 13 */ @@ -173,7 +173,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[2][ZSTD_MAX_CLEVEL+1] = { { 23, 24, 23, 4, 5, ZSTD_btlazy2 }, /* level 17 */ { 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */ { 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */ - { 26, 27, 24, 6, 5, ZSTD_btlazy2 }, /* level 20 */ + { 25, 26, 25, 6, 5, ZSTD_btlazy2 }, /* level 20 */ } }; From 287b7d9607fc3a346c6534b61ebe05e3ed3b80e3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 13:24:05 +0100 Subject: [PATCH 34/79] small hc speed improvement --- lib/zstd_compress.c | 74 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a45fbab3173..7e81dc1d227 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1206,6 +1206,66 @@ size_t ZSTD_HcFindBestMatch ( const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + const BYTE* const base = zc->base; + const U32 current = (U32)(ip-base); + U32* const chainTable = zc->contentTable; + const U32 chainSize = (1 << zc->params.contentLog); + const U32 minChain = current > chainSize ? current - chainSize : 0; + const U32 chainMask = chainSize-1; + const U32 lowLimit = zc->lowLimit; + U32 matchIndex; + const BYTE* match; + int nbAttempts=maxNbAttempts; + size_t ml=0; + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch); + + while ((matchIndex>lowLimit) && (nbAttempts)) + { + nbAttempts--; + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + { + const size_t mlt = ZSTD_count(ip, match, iLimit); + if (mlt > ml) + //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1)))) + { + ml = mlt; *offsetPtr = ip-match; + if (ip+mlt >= iLimit) break; + } + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) { U32* const chainTable = zc->contentTable; const U32 chainSize = (1 << zc->params.contentLog); @@ -1264,7 +1324,7 @@ size_t ZSTD_HcFindBestMatch ( } -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( +FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -1273,16 +1333,16 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + case 4 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); } } -/* ****************************** +/* ******************************* * Common parser - lazy strategy -********************************/ +*********************************/ FORCE_INLINE size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, @@ -1478,7 +1538,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; /* init */ ZSTD_resetSeqStore(seqStorePtr); From a5c737411ad8033c68c63956a58ae0ad778f072e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 14:16:56 +0100 Subject: [PATCH 35/79] complex level selector --- programs/paramgrill.c | 2 -- programs/zstdcli.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 02fd240f003..99ac909fe41 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -1007,8 +1007,6 @@ int main(int argc, char** argv) argument++; while ((*argument>= '0') && (*argument<='9')) cLevel *= 10, cLevel += *argument++ - '0'; - if (cLevel < 1) cLevel = 1; - if (cLevel > ZSTD_MAX_CLEVEL) cLevel = ZSTD_MAX_CLEVEL; g_params = g_seedParams[cLevel]; continue; } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 5b74df62c02..418fec70bd7 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -70,7 +70,7 @@ **************************************/ #define COMPRESSOR_NAME "zstd command line interface" #ifndef ZSTD_VERSION -# define ZSTD_VERSION "v0.3.6" +# define ZSTD_VERSION "v0.4.0" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__ From d7233d6e14945317de5ec4cf58e728bd3f5e81af Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Nov 2015 14:40:51 +0100 Subject: [PATCH 36/79] more compression tests --- lib/zstd_compress.c | 9 ++++---- programs/Makefile | 56 ++++++++++++++++++++++++++++++++++----------- programs/fileio.c | 2 +- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 7e81dc1d227..cfc590a1bcc 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1558,7 +1558,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) - if (MEM_read32(ip+1) == MEM_read32(repMatch)) + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1598,7 +1598,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) - if (MEM_read32(ip) == MEM_read32(repMatch)) + if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ size_t repLength; @@ -1642,7 +1642,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) - if (MEM_read32(ip) == MEM_read32(repMatch)) + if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ size_t repLength; @@ -1699,7 +1699,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)((ip-base) - offset_2); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (MEM_read32(ip) == MEM_read32(repMatch)) + if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1874,6 +1874,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* ctxPtr, ctxPtr->dictLimit = (U32)(ctxPtr->nextSrc - ctxPtr->base); ctxPtr->dictBase = ctxPtr->base; ctxPtr->base += ip - ctxPtr->nextSrc; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; } /* input-dictionary overlap */ diff --git a/programs/Makefile b/programs/Makefile index f46afab8f55..fe2d3413ec4 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -153,19 +153,49 @@ test-zstd: zstd datagen echo foo | ./zstd > /dev/full; if [ $$? -eq 0 ] ; then echo "write error not detected!"; false; fi echo foo | ./zstd | ./zstd -d > /dev/full; if [ $$? -eq 0 ] ; then echo "write error not detected!"; false; fi @echo "**** zstd round-trip tests **** " - ./datagen | md5sum > tmp1 - ./datagen | ./zstd -v | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 - ./datagen | ./zstd -6 -v | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 - ./datagen -g256MB | md5sum > tmp1 - ./datagen -g256MB | ./zstd -v | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 - ./datagen -g256MB | ./zstd -3 -v | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 - ./datagen -g6GB -P99 | md5sum > tmp1 - ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d | md5sum > tmp2 - diff tmp1 tmp2 + @./datagen | md5sum > tmp1 + ./datagen | ./zstd -v | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen | ./zstd -6 -v | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g256MB | md5sum > tmp1 + ./datagen -g256MB | ./zstd -v | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g256MB | ./zstd -v2 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g256MB | ./zstd -v3 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g128MB -P60| md5sum > tmp1 + ./datagen -g128MB -P60 | ./zstd -v4 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g128MB -P60 | ./zstd -v5 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g128MB -P60 | ./zstd -v6 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g64MB -P70 | md5sum > tmp1 + ./datagen -g64MB -P70 | ./zstd -v7 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g64MB -P70 | ./zstd -v8 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g64MB -P70 | ./zstd -v9 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g32MB -P75 | md5sum > tmp1 + ./datagen -g32MB -P75 | ./zstd -v10 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g32MB -P75 | ./zstd -v11 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g32MB -P75 | ./zstd -v12 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g16MB -P80 | md5sum > tmp1 + ./datagen -g16MB -P80 | ./zstd -v13 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g16MB -P80 | ./zstd -v14 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g16MB -P80 | ./zstd -v15 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g6GB -P99 | md5sum > tmp1 + ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 test-zstd32: zstd32 datagen ./datagen | ./zstd32 -v | ./zstd32 -d > $(VOID) diff --git a/programs/fileio.c b/programs/fileio.c index 04a609fed63..c599c476a26 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -283,7 +283,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* inSize = fread(inSlot, (size_t)1, blockSize, finput); if (inSize==0) break; filesize += inSize; - DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); + DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); /* Compress Block */ cSize = ZSTD_compressContinue(ctx, outBuff, outBuffSize, inSlot, inSize); From 5054ee0cc04064f562704c8fe67d08bf71277976 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 13:34:21 +0100 Subject: [PATCH 37/79] rollbuffer refactor --- lib/error.h | 1 + lib/legacy/zstd_v02.c | 2 +- lib/zstd_compress.c | 218 +++++++++++++++++++----------------------- 3 files changed, 101 insertions(+), 120 deletions(-) diff --git a/lib/error.h b/lib/error.h index 6afa87f185e..68e8d46c3ca 100644 --- a/lib/error.h +++ b/lib/error.h @@ -68,6 +68,7 @@ extern "C" { #define ERROR_LIST(ITEM) \ ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(mode_unsupported)) \ ITEM(PREFIX(memory_allocation)) \ ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index d98dd6f7b59..5c0e7aff325 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -475,7 +475,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); MEM_STATIC unsigned BIT_highbit32 (register U32 val) { # if defined(_MSC_VER) /* Visual */ - unsigned long r; + unsigned long r=0; _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index cfc590a1bcc..a379c4b8471 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -303,7 +303,7 @@ static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, #define LITERAL_NOENTROPY 63 /* cheap heuristic */ -size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, +size_t ZSTD_compressSequences(void* dst, size_t maxDstSize, const seqStore_t* seqStorePtr, size_t srcSize) { @@ -322,8 +322,9 @@ size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; BYTE* const offCodeTable = seqStorePtr->offCodeStart; - BYTE* op = dst; - BYTE* const oend = dst + maxDstSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; const size_t nbSeq = llPtr - llTable; const size_t minGain = ZSTD_minGain(srcSize); const size_t maxCSize = srcSize - minGain; @@ -500,9 +501,9 @@ size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, } /* check compressibility */ - if ((size_t)(op-dst) >= maxCSize) return 0; + if ((size_t)(op-ostart) >= maxCSize) return 0; - return op - dst; + return op - ostart; } @@ -585,7 +586,7 @@ static unsigned ZSTD_highbit(U32 val) # endif } -MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) +static unsigned ZSTD_NbCommonBytes (register size_t val) { if (MEM_isLittleEndian()) { @@ -654,7 +655,7 @@ MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) } -MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) { const BYTE* const pStart = pIn; @@ -672,6 +673,21 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pI return (size_t)(pIn - pStart); } +/** ZSTD_count_2segments +* can count match length with ip & match in potentially 2 different segments. +* convention : on reaching mEnd, match count continue starting from iStart +*/ +static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) +{ + size_t matchLength; + const BYTE* vEnd = ip + (mEnd - match); + if (vEnd > iEnd) vEnd = iEnd; + matchLength = ZSTD_count(ip, match, vEnd); + if (match + matchLength == mEnd) + matchLength += ZSTD_count(ip+matchLength, iStart, iEnd); + return matchLength; +} + /* ************************************* @@ -723,7 +739,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const BYTE* const lowest = base + ctx->lowLimit; + const BYTE* const lowest = base + ctx->dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; @@ -743,7 +759,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, /* Main Search Loop */ while (ip < ilimit) /* < instead of <=, because repcode check at (ip+1) */ { - size_t matchLength; + size_t mlCode; size_t offset; const size_t h = ZSTD_hashPtr(ip, hBits, mls); const BYTE* match = base + hashTable[h]; @@ -751,7 +767,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { - matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); + mlCode = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); ip++; offset = 0; } @@ -763,22 +779,22 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } - matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); - while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ + mlCode = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); offset = ip-match; + while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; } /* match found */ - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); - ip += matchLength + MINMATCH; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); + hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip+2-base); /* can't use current : ip may have changed */ + ip += mlCode + MINMATCH; anchor = ip; if (ip <= ilimit) { /* Fill Table */ - hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) @@ -804,7 +820,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, } /* Finale compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + return ZSTD_compressSequences(dst, maxDstSize, seqStorePtr, srcSize); } @@ -844,6 +860,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* ip = istart; const BYTE* anchor = istart; const U32 lowLimit = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowLimit; const U32 dictLimit = ctx->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; @@ -870,13 +887,12 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const size_t h = ZSTD_hashPtr(ip, hBits, mls); const U32 matchIndex = hashTable[h]; const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : lowPrefixPtr; + const BYTE* match = matchBase + matchIndex; const U32 current = (U32)(ip-base); const U32 repIndex = current + 1 - offset_1; const BYTE* repBase = repIndex < dictLimit ? dictBase : base; const BYTE* repMatch = repBase + repIndex; - size_t matchLength; + size_t mlCode; U32 offset; hashTable[h] = current; /* update hash table */ @@ -884,11 +900,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* vEnd = ip+1 + (repMatchEnd-repMatch); - if (vEnd > iend) vEnd = iend; - matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, vEnd); - if (repMatch + matchLength + MINMATCH == dictEnd) - matchLength += ZSTD_count(ip+1 + matchLength + MINMATCH, lowPrefixPtr, iend); + mlCode = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repMatchEnd, lowPrefixPtr); ip++; offset = 0; } @@ -899,26 +911,24 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend; - matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount); - if (match + matchLength + MINMATCH == dictEnd) - matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend); - while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */ + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mlCode = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iend, matchEnd, lowPrefixPtr); + while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ offset = current - matchIndex; offset_2 = offset_1; offset_1 = offset; } } - /* found a match */ - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength); - ip += matchLength + MINMATCH; + /* found a match : store it */ + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); + hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip+2-base); /* can't use current : ip may have changed */ + ip += mlCode + MINMATCH; anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base); + /* Fill Table */ hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while (ip <= ilimit) @@ -929,16 +939,12 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - size_t maxIlength = iend - ip; - size_t maxMlength = repIndex2 < dictLimit ? (size_t)(dictLimit - repIndex2) : (size_t)(iend - repMatch2); - size_t maxML = MIN(maxMlength, maxIlength); - size_t ml = ZSTD_count(ip+MINMATCH, repMatch2+MINMATCH, ip + maxML); - U32 tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - if (ml+MINMATCH == maxMlength) /* reached end of extDict */ - ml += ZSTD_count(ip+MINMATCH+ml, lowPrefixPtr, iend); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); - ip += ml+MINMATCH; + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t repLength2 = ZSTD_count_2segments(ip+MINMATCH, repMatch2+MINMATCH, iend, repEnd2, lowPrefixPtr); + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; + ip += repLength2+MINMATCH; anchor = ip; continue; } @@ -955,7 +961,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, } /* Finale compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + return ZSTD_compressSequences(dst, maxDstSize, seqStorePtr, srcSize); } @@ -1020,7 +1026,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ break; /* just drop , to guarantee consistency (miss a bit of compression; if someone knows better, please tell) */ @@ -1083,14 +1090,15 @@ size_t ZSTD_insertBtAndFindBestMatch ( const BYTE* match = base + matchIndex; size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; if (matchLength > bestLength) { if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) ) bestLength = matchLength, *offsetPtr = current - matchIndex; if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* just drop, to guarantee consistency (miss a little bit of compression) */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ } if (match[matchLength] < ip[matchLength]) @@ -1098,18 +1106,18 @@ size_t ZSTD_insertBtAndFindBestMatch ( /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - if (matchIndex <= btLow) smallerPtr=&dummy32; /* beyond tree size, stop the search */ - matchIndex = (matchIndex <= btLow) ? windowLow : nextPtr[1]; + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ } else { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ largerPtr = nextPtr; - if (matchIndex <= btLow) largerPtr=&dummy32; /* beyond tree size, stop the search */ - matchIndex = (matchIndex <= btLow) ? windowLow : nextPtr[0]; + matchIndex = nextPtr[0]; } } @@ -1213,7 +1221,7 @@ size_t ZSTD_HcFindBestMatch ( const U32 chainSize = (1 << zc->params.contentLog); const U32 minChain = current > chainSize ? current - chainSize : 0; const U32 chainMask = chainSize-1; - const U32 lowLimit = zc->lowLimit; + const U32 lowLimit = zc->dictLimit; /* should be equal to zc->lowLimit, but safer use this one if they are not since dictLimit >= lowLimit */ U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; @@ -1230,10 +1238,10 @@ size_t ZSTD_HcFindBestMatch ( { const size_t mlt = ZSTD_count(ip, match, iLimit); if (mlt > ml) - //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1)))) + //if ( (4*(int)(mlt-ml)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)(*offsetPtr) + 1)) ) /* stronger but slower */ { ml = mlt; *offsetPtr = ip-match; - if (ip+mlt >= iLimit) break; + if (ip+mlt >= iLimit) break; /* max ml possible; avoid risk of reading too far when testing ip[ml] */ } } @@ -1273,49 +1281,39 @@ size_t ZSTD_HcFindBestMatch_extDict ( const BYTE* const base = zc->base; const BYTE* const dictBase = zc->dictBase; const U32 dictLimit = zc->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; const U32 lowLimit = zc->lowLimit; const U32 current = (U32)(ip-base); const U32 minChain = current > chainSize ? current - chainSize : 0; U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; - size_t ml=0; + size_t ml=MINMATCH-1; /* HC4 match finder */ matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch); while ((matchIndex>lowLimit) && (nbAttempts)) { + size_t currentMl=0; nbAttempts--; if (matchIndex >= dictLimit) { match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ - { - const size_t mlt = ZSTD_count(ip, match, iLimit); - if (mlt > ml) - //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1)))) - { - ml = mlt; *offsetPtr = ip-match; - if (ip+mlt >= iLimit) break; - } - } + currentMl = ZSTD_count(ip, match, iLimit); } else { match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 */ - { - size_t mlt; - const BYTE* vLimit = ip + (dictLimit - matchIndex); - if (vLimit > iLimit) vLimit = iLimit; - mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; - if (match+mlt == dictBase+dictLimit) - mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *offsetPtr = current - matchIndex; } - } + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; } + /* save best solution */ + if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex; } + if (matchIndex <= minChain) break; matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } @@ -1489,7 +1487,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } /* Final compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + return ZSTD_compressSequences(dst, maxDstSize, seqStorePtr, srcSize); } @@ -1557,16 +1555,12 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* vEnd = ip+1 + (repEnd - repMatch); - if (vEnd > iend) vEnd = iend; - matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; - if (repMatch + matchLength == dictEnd) - matchLength += ZSTD_count(ip+1+matchLength, prefixStart, iend); + matchLength = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; if (depth==0) goto _storeSequence; } } @@ -1597,23 +1591,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ - size_t repLength; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* vEnd = ip + (repEnd - repMatch); - if (vEnd > iend) vEnd = iend; - repLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; - if (repMatch + repLength == dictEnd) - repLength += ZSTD_count(ip+repLength, prefixStart, iend); - { - int gain2 = (int)(repLength * 3); - int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); - if ((repLength >= MINMATCH) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } + size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + int gain2 = (int)(repLength * 3); + int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= MINMATCH) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; } } @@ -1641,23 +1628,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit)) + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ - size_t repLength; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* vEnd = ip + (repEnd - repMatch); - if (vEnd > iend) vEnd = iend; - repLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; - if (repMatch + repLength == dictEnd) - repLength += ZSTD_count(ip+repLength, prefixStart, iend); - { - int gain2 = (int)(repLength * 4); - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); - if ((repLength >= MINMATCH) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } + size_t repLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + int gain2 = (int)(repLength * 4); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= MINMATCH) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; } } @@ -1703,14 +1683,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - const BYTE* vEnd = ip + (repEnd - repMatch); - if (vEnd > iend) vEnd = iend; - matchLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH; - if (repMatch + matchLength == dictEnd) - matchLength += ZSTD_count(ip+matchLength, prefixStart, iend); - offset = offset_2; - offset_2 = offset_1; - offset_1 = offset; + matchLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; + offset = offset_2; offset_2 = offset_1; offset_1 = offset; /* swap offset history */ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; @@ -1728,7 +1702,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, } /* Final compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + return ZSTD_compressSequences(dst, maxDstSize, seqStorePtr, srcSize); } @@ -1747,6 +1721,12 @@ size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDst return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2); } +static size_t ZSTD_compressError(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + (void)ctx; (void)dst; (void)maxDstSize; (void)src; (void)srcSize; + return ERROR(mode_unsupported); +} + typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); @@ -1766,7 +1746,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int case ZSTD_lazy2: return ZSTD_compressBlock_lazy2_extDict; case ZSTD_btlazy2: - return ZSTD_compressBlock_btlazy2; + return ZSTD_compressError; } } else @@ -1818,7 +1798,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, if ((U32)(ip+blockSize - (ctxPtr->base + ctxPtr->lowLimit)) > maxDist) /* respect windowLog contract */ - ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist; + ctxPtr->dictLimit = ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist; //cSize = blockCompressor(ctxPtr, op+3, maxDstSize-3, ip, blockSize); cSize = ZSTD_compressBlock(ctxPtr, op+3, maxDstSize-3, ip, blockSize); From 06eade51020b57f8b44e7167cc963ab6f8cad5d0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 14:23:47 +0100 Subject: [PATCH 38/79] comments --- lib/zstd_compress.c | 46 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a379c4b8471..295a7e6560e 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -765,7 +765,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, const BYTE* match = base + hashTable[h]; hashTable[h] = (U32)(ip-base); - if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) + if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) /* note : by construction, offset_1 <= (ip-base) */ { mlCode = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); ip++; @@ -773,7 +773,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, } else { - if ( (match < lowest) || + if ( (match <= lowest) || (MEM_read32(match) != MEM_read32(ip)) ) { ip += ((ip-anchor) >> g_searchStrength) + 1; @@ -801,11 +801,11 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ - size_t ml = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + size_t rlCode = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml); - ip += ml+MINMATCH; + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode); + ip += rlCode+MINMATCH; anchor = ip; continue; /* faster when present ... (?) */ } @@ -989,8 +989,9 @@ size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, /* ************************************* * Binary Tree search ***************************************/ -/** ZSTD_insertBt1 : add one ptr to tree - @ip : assumed <= iend-8 */ +/** ZSTD_insertBt1 : add one or multiple positions to tree +* @ip : assumed <= iend-8 +* @return : nb of positions added */ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) { U32* const hashTable = zc->hashTable; @@ -1011,10 +1012,10 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co const U32 windowSize = 1 << zc->params.windowLog; const U32 windowLow = windowSize >= current ? 0 : current - windowSize; - if ((current-matchIndex == 1) /* RLE */ - && MEM_read64(match) == MEM_read64(ip)) + if ( (current-matchIndex == 1) /* RLE */ + && (MEM_read64(match) == MEM_read64(ip)) ) { - size_t rleLength = ZSTD_count(ip+sizeof(size_t), match+sizeof(size_t), iend) + sizeof(size_t); + size_t rleLength = ZSTD_count(ip+8, match+8, iend) + 8; return (U32)(rleLength - mls); } @@ -1030,7 +1031,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* just drop , to guarantee consistency (miss a bit of compression; if someone knows better, please tell) */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ if (match[matchLength] < ip[matchLength]) { @@ -1133,7 +1134,6 @@ static const BYTE* ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BY const BYTE* const base = zc->base; const U32 target = (U32)(ip - base); U32 idx = zc->nextToUpdate; - //size_t dummy; for( ; idx < target ; ) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares); @@ -1152,12 +1152,8 @@ size_t ZSTD_BtFindBestMatch ( const U32 maxNbAttempts, const U32 mls) { const BYTE* nextToUpdate = ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - if (nextToUpdate > ip) - { - /* RLE data */ - *offsetPtr = 1; - return ZSTD_count(ip, ip-1, iLimit); - } + if (nextToUpdate > ip) /* RLE data */ + { *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); } return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls); } @@ -1184,8 +1180,9 @@ FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS ( #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] -/* Update chains up to ip (excluded) */ -static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +/* Update chains up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; @@ -1221,7 +1218,7 @@ size_t ZSTD_HcFindBestMatch ( const U32 chainSize = (1 << zc->params.contentLog); const U32 minChain = current > chainSize ? current - chainSize : 0; const U32 chainMask = chainSize-1; - const U32 lowLimit = zc->dictLimit; /* should be equal to zc->lowLimit, but safer use this one if they are not since dictLimit >= lowLimit */ + const U32 lowLimit = zc->dictLimit; /* should be == zc->lowLimit, but safer to use this one if they are not, since dictLimit >= lowLimit */ U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; @@ -1252,6 +1249,13 @@ size_t ZSTD_HcFindBestMatch ( return ml; } +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch); + FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, From c1e52f0f05945c992fb607ca3da216f47157abfa Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 14:37:59 +0100 Subject: [PATCH 39/79] hc simplication --- lib/zstd_compress.c | 99 +++++++++++---------------------------------- 1 file changed, 24 insertions(+), 75 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 295a7e6560e..902b1d4130b 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1206,78 +1206,11 @@ static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HcFindBestMatch ( +size_t ZSTD_HcFindBestMatch_generic ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - const BYTE* const base = zc->base; - const U32 current = (U32)(ip-base); - U32* const chainTable = zc->contentTable; - const U32 chainSize = (1 << zc->params.contentLog); - const U32 minChain = current > chainSize ? current - chainSize : 0; - const U32 chainMask = chainSize-1; - const U32 lowLimit = zc->dictLimit; /* should be == zc->lowLimit, but safer to use this one if they are not, since dictLimit >= lowLimit */ - U32 matchIndex; - const BYTE* match; - int nbAttempts=maxNbAttempts; - size_t ml=0; - - /* HC4 match finder */ - matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch); - - while ((matchIndex>lowLimit) && (nbAttempts)) - { - nbAttempts--; - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - { - const size_t mlt = ZSTD_count(ip, match, iLimit); - if (mlt > ml) - //if ( (4*(int)(mlt-ml)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)(*offsetPtr) + 1)) ) /* stronger but slower */ - { - ml = mlt; *offsetPtr = ip-match; - if (ip+mlt >= iLimit) break; /* max ml possible; avoid risk of reading too far when testing ip[ml] */ - } - } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return ml; -} - -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HcFindBestMatch_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch); - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - - -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HcFindBestMatch_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) + const U32 maxNbAttempts, const U32 mls, const U32 extDict) { U32* const chainTable = zc->contentTable; const U32 chainSize = (1 << zc->params.contentLog); @@ -1296,13 +1229,13 @@ size_t ZSTD_HcFindBestMatch_extDict ( size_t ml=MINMATCH-1; /* HC4 match finder */ - matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch); + matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); while ((matchIndex>lowLimit) && (nbAttempts)) { size_t currentMl=0; nbAttempts--; - if (matchIndex >= dictLimit) + if ((!extDict) || matchIndex >= dictLimit) { match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ @@ -1326,8 +1259,24 @@ size_t ZSTD_HcFindBestMatch_extDict ( } +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} + + FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ + ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) @@ -1335,9 +1284,9 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); } } From 03526e18fa8db568311d7233b82ea38302eb1f8d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 15:29:15 +0100 Subject: [PATCH 40/79] preliminary rollbuffer support for bt mode --- lib/zstd_compress.c | 228 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 216 insertions(+), 12 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 902b1d4130b..ea7dd6974a0 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1009,8 +1009,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; U32 dummy32; /* to be nullified at the end */ - const U32 windowSize = 1 << zc->params.windowLog; - const U32 windowLow = windowSize >= current ? 0 : current - windowSize; + const U32 windowLow = zc->lowLimit; if ( (current-matchIndex == 1) /* RLE */ && (MEM_read64(match) == MEM_read64(ip)) ) @@ -1076,8 +1075,7 @@ size_t ZSTD_insertBtAndFindBestMatch ( const BYTE* const base = zc->base; const U32 current = (U32)(ip-base); const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowSize = 1 << zc->params.windowLog; - const U32 windowLow = windowSize >= current ? 0 : current - windowSize; + const U32 windowLow = zc->lowLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; size_t bestLength = 0; @@ -1174,6 +1172,213 @@ FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS ( } +/** ZSTD_insertBt1_extDict : add one or multiple positions to tree +* @ip : assumed <= iend-8 +* @return : nb of positions added */ +static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->contentTable; + const U32 btLog = zc->params.contentLog - 1; + const U32 btMask= (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match = base + matchIndex; + U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 dummy32; /* to be nullified at the end */ + const U32 windowLow = zc->lowLimit; + + if ( (current-matchIndex == 1) /* RLE */ + && (MEM_read64(match) == MEM_read64(ip)) ) + { + size_t rleLength = ZSTD_count(ip+8, match+8, iend) + 8; + return (U32)(rleLength - mls); + } + + hashTable[h] = (U32)(ip - base); /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) + { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + + if (matchIndex+matchLength >= dictLimit) + { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } + else + { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + } + + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + + if (match[matchLength] < ip[matchLength]) /* necessarily within correct buffer */ + { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } + else + { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } + } + + *smallerPtr = *largerPtr = 0; + return 1; +} + + +static const BYTE* ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + for( ; idx < target ; ) + idx += ZSTD_insertBt1_extDict(zc, base+idx, mls, iend, nbCompares); + + zc->nextToUpdate = idx; + return base + idx; +} + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_insertBtAndFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 nbCompares, const U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->contentTable; + const U32 btLog = zc->params.contentLog - 1; + const U32 btMask= (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + size_t bestLength = 0; + U32 dummy32; /* to be nullified at the end */ + + hashTable[h] = (U32)(ip-base); /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) + { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if (matchIndex+matchLength >= dictLimit) + { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } + else + { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + } + + if (matchLength > bestLength) + { + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = current - matchIndex; + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) + { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } + else + { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } + } + + *smallerPtr = *largerPtr = 0; + + zc->nextToUpdate = current+1; /* current has been inserted */ + return bestLength; +} + +/** Tree updater, providing best match */ +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + const BYTE* nextToUpdate = ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + if (nextToUpdate > ip) /* RLE data */ + { *offsetPtr = 1; return ZSTD_count(ip, ip-1, iLimit); } + return ZSTD_insertBtAndFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls); +} + + +FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + /* *********************** * Hash Chain *************************/ @@ -1489,7 +1694,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; /* init */ ZSTD_resetSeqStore(seqStorePtr); @@ -1674,10 +1879,9 @@ size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDst return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2); } -static size_t ZSTD_compressError(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - (void)ctx; (void)dst; (void)maxDstSize; (void)src; (void)srcSize; - return ERROR(mode_unsupported); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 1, 2); } @@ -1699,7 +1903,7 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int case ZSTD_lazy2: return ZSTD_compressBlock_lazy2_extDict; case ZSTD_btlazy2: - return ZSTD_compressError; + return ZSTD_compressBlock_btlazy2_extDict; } } else @@ -1722,11 +1926,11 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int } -size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy, ctx->lowLimit < ctx->dictLimit); + ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.strategy, zc->lowLimit < zc->dictLimit); if (srcSize < MIN_CBLOCK_SIZE+3) return 0; /* don't even attempt compression below a certain srcSize */ - return blockCompressor(ctx, dst, maxDstSize, src, srcSize); + return blockCompressor(zc, dst, maxDstSize, src, srcSize); } From 239cc289d323e65f691fd440fab716e3f0632fc3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 16:17:21 +0100 Subject: [PATCH 41/79] fixed asan bugs --- lib/zstd_compress.c | 58 +++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ea7dd6974a0..8c08a8de5eb 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -787,14 +787,14 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, } /* match found */ - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); - hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip+2-base); /* can't use current : ip may have changed */ + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); ip += mlCode + MINMATCH; anchor = ip; if (ip <= ilimit) { /* Fill Table */ + hashTable[ZSTD_hashPtr(ip-(mlCode+MINMATCH)+2, hBits, mls)] = (U32)(ip-(mlCode+MINMATCH)+2-base); /* here because ip-(mlCode+MINMATCH)+2 could be > iend-8 without ip <= ilimit check*/ hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) @@ -922,13 +922,13 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, /* found a match : store it */ ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); - hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip+2-base); /* can't use current : ip may have changed */ ip += mlCode + MINMATCH; anchor = ip; if (ip <= ilimit) { /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while (ip <= ilimit) @@ -1442,6 +1442,7 @@ size_t ZSTD_HcFindBestMatch_generic ( nbAttempts--; if ((!extDict) || matchIndex >= dictLimit) { + //printf("current : %6u ; matchIndex : %6u ; dictLimit : %6u ; ml : %3u \n", current, matchIndex, dictLimit, (U32)ml); match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ currentMl = ZSTD_count(ip, match, iLimit); @@ -1454,7 +1455,7 @@ size_t ZSTD_HcFindBestMatch_generic ( } /* save best solution */ - if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex; } + if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } if (matchIndex <= minChain) break; matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); @@ -1540,18 +1541,18 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, } { - /* first search (depth 0) */ - size_t offsetFound = 99999999; - size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < MINMATCH) - { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + /* first search (depth 0) */ + size_t offsetFound = 99999999; + size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < MINMATCH) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } /* let's try to find a better solution */ if (depth>=1) @@ -1724,18 +1725,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, } { - /* first search (depth 0) */ - size_t offsetFound = 99999999; - size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < MINMATCH) - { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } + /* first search (depth 0) */ + size_t offsetFound = 99999999; + size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < MINMATCH) + { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } /* let's try to find a better solution */ if (depth>=1) @@ -1837,6 +1838,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const U32 repIndex = (U32)((ip-base) - offset_2); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ From 225179dd0568cd519e97cfa78d6ebf038954b4f0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 16:52:22 +0100 Subject: [PATCH 42/79] more tests (bt modes) --- lib/zstd_compress.c | 4 ++++ programs/Makefile | 54 +++++++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8c08a8de5eb..cae9c2471e0 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1222,6 +1222,8 @@ static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32 { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ @@ -1312,6 +1314,8 @@ size_t ZSTD_insertBtAndFindBestMatch_extDict ( { match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } if (matchLength > bestLength) diff --git a/programs/Makefile b/programs/Makefile index fe2d3413ec4..eddd3258371 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -VERSION?= 0.3.6 +VERSION?= 0.4.0 DESTDIR?= PREFIX ?= /usr/local @@ -158,40 +158,52 @@ test-zstd: zstd datagen @diff tmp1 tmp2 ./datagen | ./zstd -6 -v | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g256MB | md5sum > tmp1 - ./datagen -g256MB | ./zstd -v | ./zstd -d | md5sum > tmp2 + @./datagen -g257MB | md5sum > tmp1 + ./datagen -g257MB | ./zstd -v | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g256MB | ./zstd -v2 | ./zstd -d | md5sum > tmp2 + ./datagen -g257MB | ./zstd -v2 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g256MB | ./zstd -v3 | ./zstd -d | md5sum > tmp2 + ./datagen -g257MB | ./zstd -v3 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g128MB -P60| md5sum > tmp1 - ./datagen -g128MB -P60 | ./zstd -v4 | ./zstd -d | md5sum > tmp2 + @./datagen -g129MB -P60| md5sum > tmp1 + ./datagen -g129MB -P60 | ./zstd -v4 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g128MB -P60 | ./zstd -v5 | ./zstd -d | md5sum > tmp2 + ./datagen -g129MB -P60 | ./zstd -v5 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g128MB -P60 | ./zstd -v6 | ./zstd -d | md5sum > tmp2 + ./datagen -g129MB -P60 | ./zstd -v6 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g64MB -P70 | md5sum > tmp1 - ./datagen -g64MB -P70 | ./zstd -v7 | ./zstd -d | md5sum > tmp2 + @./datagen -g65MB -P70 | md5sum > tmp1 + ./datagen -g65MB -P70 | ./zstd -v7 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g64MB -P70 | ./zstd -v8 | ./zstd -d | md5sum > tmp2 + ./datagen -g65MB -P70 | ./zstd -v8 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g64MB -P70 | ./zstd -v9 | ./zstd -d | md5sum > tmp2 + ./datagen -g65MB -P70 | ./zstd -v9 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g32MB -P75 | md5sum > tmp1 - ./datagen -g32MB -P75 | ./zstd -v10 | ./zstd -d | md5sum > tmp2 + @./datagen -g33MB -P75 | md5sum > tmp1 + ./datagen -g33MB -P75 | ./zstd -v10 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g32MB -P75 | ./zstd -v11 | ./zstd -d | md5sum > tmp2 + ./datagen -g33MB -P75 | ./zstd -v11 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g32MB -P75 | ./zstd -v12 | ./zstd -d | md5sum > tmp2 + ./datagen -g33MB -P75 | ./zstd -v12 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g16MB -P80 | md5sum > tmp1 - ./datagen -g16MB -P80 | ./zstd -v13 | ./zstd -d | md5sum > tmp2 + @./datagen -g17MB -P80 | md5sum > tmp1 + ./datagen -g17MB -P80 | ./zstd -v13 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g16MB -P80 | ./zstd -v14 | ./zstd -d | md5sum > tmp2 + ./datagen -g17MB -P80 | ./zstd -v14 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g16MB -P80 | ./zstd -v15 | ./zstd -d | md5sum > tmp2 + ./datagen -g17MB -P80 | ./zstd -v15 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g9MB -P85 | md5sum > tmp1 + ./datagen -g9MB -P85 | ./zstd -v16 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g9MB -P85 | ./zstd -v17 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g9MB -P85 | ./zstd -v18 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + @./datagen -g5MB -P89 | md5sum > tmp1 + ./datagen -g5MB -P89 | ./zstd -v19 | ./zstd -d | md5sum > tmp2 + @diff tmp1 tmp2 + ./datagen -g5MB -P89 | ./zstd -v20 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 ./datagen -g6GB -P99 | md5sum > tmp1 ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d | md5sum > tmp2 From b5d2a0c8279a4e916969bc787a7f82048459bb9e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Nov 2015 17:10:19 +0100 Subject: [PATCH 43/79] visual compatibility --- programs/paramgrill.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 99ac909fe41..47df78ab998 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -43,6 +43,10 @@ # define BMK_LEGACY_TIMER 1 #endif +#if defined(_MSC_VER) +# define snprintf _snprintf /* snprintf unsupported by Visual <= 2012 */ +#endif + /************************************** * Includes @@ -873,6 +877,7 @@ int usage(char* exename) DISPLAY( "Usage :\n"); DISPLAY( " %s [arg] file\n", exename); DISPLAY( "Arguments :\n"); + DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n"); DISPLAY( " -H/-h : Help (this text + advanced options)\n"); return 0; } @@ -881,7 +886,8 @@ int usage_advanced(void) { DISPLAY( "\nAdvanced options :\n"); DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); - DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100); + DISPLAY( " -B# : cut input into blocks of size # (default : single block)\n"); + DISPLAY( " -P# : generated sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100); return 0; } From c36521571e0d7a5fb8966b9fe3092451fc7dbd3b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 24 Nov 2015 14:06:07 +0100 Subject: [PATCH 44/79] fixed large buffers bug --- lib/zstd_compress.c | 67 ++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index cae9c2471e0..0c9b0e87313 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -727,19 +727,19 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) ***************************************/ FORCE_INLINE -size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const U32 mls) { - U32* hashTable = ctx->hashTable; - const U32 hBits = ctx->params.hashLog; - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const base = ctx->base; + U32* hashTable = zc->hashTable; + const U32 hBits = zc->params.hashLog; + seqStore_t* seqStorePtr = &(zc->seqStore); + const BYTE* const base = zc->base; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const BYTE* const lowest = base + ctx->dictLimit; + const BYTE* const lowest = base + zc->dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; @@ -1446,7 +1446,6 @@ size_t ZSTD_HcFindBestMatch_generic ( nbAttempts--; if ((!extDict) || matchIndex >= dictLimit) { - //printf("current : %6u ; matchIndex : %6u ; dictLimit : %6u ; ml : %3u \n", current, matchIndex, dictLimit, (U32)ml); match = base + matchIndex; if (match[ml] == ip[ml]) /* potentially better */ currentMl = ZSTD_count(ip, match, iLimit); @@ -1691,6 +1690,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ctx->dictBase; const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ctx->lowLimit; size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; @@ -1823,8 +1823,10 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, /* catch up */ if (offset) { - while ((start>anchor) && (start>prefixStart+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */ - { start--; matchLength++; } + U32 matchIndex = (U32)((start-base) - offset); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; } @@ -1960,8 +1962,11 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, if (remaining < blockSize) blockSize = remaining; if ((U32)(ip+blockSize - (ctxPtr->base + ctxPtr->lowLimit)) > maxDist) + { /* respect windowLog contract */ - ctxPtr->dictLimit = ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist; + ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist; + if (ctxPtr->dictLimit < ctxPtr->lowLimit) ctxPtr->dictLimit = ctxPtr->lowLimit; + } //cSize = blockCompressor(ctxPtr, op+3, maxDstSize-3, ip, blockSize); cSize = ZSTD_compressBlock(ctxPtr, op+3, maxDstSize-3, ip, blockSize); @@ -1990,46 +1995,46 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr, } -size_t ZSTD_compressContinue (ZSTD_CCtx* ctxPtr, +size_t ZSTD_compressContinue (ZSTD_CCtx* zc, void* dst, size_t dstSize, const void* src, size_t srcSize) { const BYTE* const ip = (const BYTE*) src; /* preemptive overflow correction */ - if (ctxPtr->lowLimit > (1<<30) ) + if (zc->lowLimit > (1<<30) ) { - U32 correction = ctxPtr->lowLimit; - ZSTD_reduceIndex(ctxPtr, correction); - ctxPtr->base += correction; - ctxPtr->dictBase += correction; - ctxPtr->lowLimit -= correction; - ctxPtr->dictLimit -= correction; - if (ctxPtr->nextToUpdate < correction) ctxPtr->nextToUpdate = 0; - else ctxPtr->nextToUpdate -= correction; + U32 correction = zc->lowLimit; + ZSTD_reduceIndex(zc, correction); + zc->base += correction; + zc->dictBase += correction; + zc->lowLimit -= correction; + zc->dictLimit -= correction; + if (zc->nextToUpdate < correction) zc->nextToUpdate = 0; + else zc->nextToUpdate -= correction; } /* Check if blocks follow each other */ - if (src != ctxPtr->nextSrc) + if (src != zc->nextSrc) { /* not contiguous */ - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictLimit = (U32)(ctxPtr->nextSrc - ctxPtr->base); - ctxPtr->dictBase = ctxPtr->base; - ctxPtr->base += ip - ctxPtr->nextSrc; - ctxPtr->nextToUpdate = ctxPtr->dictLimit; + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base += ip - zc->nextSrc; + zc->nextToUpdate = zc->dictLimit; } /* input-dictionary overlap */ - if ((ip+srcSize > ctxPtr->dictBase + ctxPtr->lowLimit) && (ip < ctxPtr->dictBase + ctxPtr->dictLimit)) + if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit)) { - ctxPtr->lowLimit = (U32)(ip + srcSize - ctxPtr->dictBase); - if (ctxPtr->lowLimit > ctxPtr->dictLimit) ctxPtr->lowLimit = ctxPtr->dictLimit; + zc->lowLimit = (U32)(ip + srcSize - zc->dictBase); + if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit; } - ctxPtr->nextSrc = ip + srcSize; + zc->nextSrc = ip + srcSize; - return ZSTD_compress_generic (ctxPtr, dst, dstSize, src, srcSize); + return ZSTD_compress_generic (zc, dst, dstSize, src, srcSize); } From 88fcd2916e7e1f37a468df8a64b2388f8eb41aa9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 14:42:45 +0100 Subject: [PATCH 45/79] Added : zstd buffered API --- Makefile | 13 +- NEWS | 2 + lib/error.h | 2 +- lib/zstd_buffered.c | 531 +++++++++++++++++++++++++++++++++++++ lib/zstd_buffered.h | 146 ++++++++++ lib/zstd_buffered_static.h | 62 +++++ lib/zstd_compress.c | 68 +++-- lib/zstd_decompress.c | 196 +++++++++----- lib/zstd_internal.h | 5 +- lib/zstd_static.h | 185 +++++++------ programs/Makefile | 4 +- programs/fileio.c | 282 ++++++-------------- programs/fullbench.c | 67 +---- programs/paramgrill.c | 50 +--- 14 files changed, 1145 insertions(+), 468 deletions(-) create mode 100644 lib/zstd_buffered.c create mode 100644 lib/zstd_buffered.h create mode 100644 lib/zstd_buffered_static.h diff --git a/Makefile b/Makefile index 7caaa466112..4ec4a795400 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,14 @@ export VERSION := 0.4.0 PRGDIR = programs ZSTDDIR = lib -.PHONY: clean +# Define nul output +ifneq (,$(filter Windows%,$(OS))) +VOID = nul +else +VOID = /dev/null +endif + +.PHONY: default all zstdprogram clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan default: zstdprogram @@ -49,8 +56,8 @@ zstdprogram: $(MAKE) -C $(PRGDIR) clean: - $(MAKE) -C $(ZSTDDIR) $@ - $(MAKE) -C $(PRGDIR) $@ + @$(MAKE) -C $(ZSTDDIR) $@ > $(VOID) + @$(MAKE) -C $(PRGDIR) $@ > $(VOID) @echo Cleaning completed diff --git a/NEWS b/NEWS index c8177e8da89..75c9b87e3a9 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ v0.4.0 +Command line utility is now compatible with high compression levels Removed zstdhc => merged into zstd +Added : ZBUFF API (see zstd_buffered.h) Rolling buffer support v0.3.6 diff --git a/lib/error.h b/lib/error.h index 68e8d46c3ca..86f3c384fc1 100644 --- a/lib/error.h +++ b/lib/error.h @@ -68,7 +68,7 @@ extern "C" { #define ERROR_LIST(ITEM) \ ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ - ITEM(PREFIX(mode_unsupported)) \ + ITEM(PREFIX(mode_unsupported)) ITEM(PREFIX(init_missing))\ ITEM(PREFIX(memory_allocation)) \ ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c new file mode 100644 index 00000000000..28bff9ec3a0 --- /dev/null +++ b/lib/zstd_buffered.c @@ -0,0 +1,531 @@ +/* + Buffered version of Zstd compression library + Copyright (C) 2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +/* ************************************* +* Includes +***************************************/ +#include +#include "error.h" +#include "zstd_static.h" +#include "zstd_buffered_static.h" + + +/** ************************************************ +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *maxDstSizePtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *maxDstSizePtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* input : 128 KB block size is the internal unit, it improves latency to use this value. +* output : ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block at best speed. +* **************************************************/ + +typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush } ZBUFF_cStage; + +/* *** Ressources *** */ +struct ZBUFF_CCtx_s { + ZSTD_CCtx* zc; + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + size_t blockSize; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZBUFF_cStage stage; +}; /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */ + +ZBUFF_CCtx* ZBUFF_createCCtx(void) +{ + ZBUFF_CCtx* zbc = (ZBUFF_CCtx*)malloc(sizeof(ZBUFF_CCtx)); + memset(zbc, 0, sizeof(*zbc)); + zbc->zc = ZSTD_createCCtx(); + return zbc; +} + +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) +{ + if (zbc==NULL) return 0; /* support free on NULL */ + ZSTD_freeCCtx(zbc->zc); + free(zbc); + return 0; +} + + +/* *** Initialization *** */ + +#define MIN(a,b) ( ((a)<(b)) ? (a) : (b) ) +#define BLOCKSIZE (128 * 1024) /* a bit too "magic", should come from reference */ +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params) +{ + size_t neededInBuffSize; + + ZSTD_validateParams(¶ms); + neededInBuffSize = (size_t)1 << params.windowLog; + + /* allocate buffers */ + if (zbc->inBuffSize < neededInBuffSize) + { + zbc->inBuffSize = neededInBuffSize; + free(zbc->inBuff); /* should not be necessary */ + zbc->inBuff = (char*)malloc(neededInBuffSize); + if (zbc->inBuff == NULL) return ERROR(memory_allocation); + } + zbc->blockSize = MIN(BLOCKSIZE, zbc->inBuffSize); + if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) + { + zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1; + free(zbc->outBuff); /* should not be necessary */ + zbc->outBuff = (char*)malloc(zbc->outBuffSize); + if (zbc->outBuff == NULL) return ERROR(memory_allocation); + } + + zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, zbc->outBuff, zbc->outBuffSize, params); + if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize; + + zbc->inToCompress = 0; + zbc->inBuffPos = 0; + zbc->inBuffTarget = zbc->blockSize; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_flush; /* starts by flushing the header */ + return 0; /* ready to go */ +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZBUFF_compressInit_advanced(zbc, ZSTD_getParams(compressionLevel, 0)); +} + + + +/* *** Compression *** */ + +static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + size_t length = MIN(maxDstSize, srcSize); + memcpy(dst, src, length); + return length; +} + +static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, + void* dst, size_t* maxDstSizePtr, + const void* src, size_t* srcSizePtr, + int flush) /* aggregate : wait for full block before compressing */ +{ + U32 notDone = 1; + const char* const istart = (const char*)src; + const char* ip = istart; + const char* const iend = istart + *srcSizePtr; + char* const ostart = (char*)dst; + char* op = ostart; + char* const oend = ostart + *maxDstSizePtr; + + while (notDone) + { + switch(zbc->stage) + { + case ZBUFFcs_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ + + case ZBUFFcs_load: + /* complete inBuffer */ + { + size_t toLoad = zbc->inBuffTarget - zbc->inBuffPos; + size_t loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip); + zbc->inBuffPos += loaded; + ip += loaded; + if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) + { notDone = 0; break; } /* not enough input to get a full block : stop there, wait for more */ + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + { + void* cDst; + size_t cSize; + size_t iSize = zbc->inBuffPos - zbc->inToCompress; + if ((size_t)(oend-op) > ZSTD_compressBound(iSize)) + cDst = op; /* compress directly into output buffer (avoid flush stage) */ + else + cDst = zbc->outBuff; + cSize = ZSTD_compressContinue(zbc->zc, cDst, oend-op, zbc->inBuff + zbc->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + /* prepare next block */ + zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize; + if (zbc->inBuffTarget > zbc->inBuffSize) + { zbc->inBuffPos = 0; zbc->inBuffTarget = zbc->blockSize; } + zbc->inToCompress = zbc->inBuffPos; + if (cDst == op) { op += cSize; break; } /* no need to flush */ + zbc->outBuffContentSize = cSize; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_flush; + // break; /* flush stage follows */ + } + + case ZBUFFcs_flush: + /* flush into dst */ + { + size_t toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize; + size_t flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush); + op += flushed; + zbc->outBuffFlushedSize += flushed; + if (toFlush!=flushed) + { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */ + zbc->outBuffContentSize = 0; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_load; + break; + } + } + } + + *srcSizePtr = ip - istart; + *maxDstSizePtr = op - ostart; + { + size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos; + if (hintInSize==0) hintInSize = zbc->blockSize; + return hintInSize; + } +} + +size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, + void* dst, size_t* maxDstSizePtr, + const void* src, size_t* srcSizePtr) +{ return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0); } + + + +/* *** Finalize *** */ + +size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr) +{ + size_t srcSize = 0; + ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, NULL, &srcSize, 1); + return zbc->outBuffContentSize - zbc->outBuffFlushedSize; +} + + +size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + *maxDstSizePtr; + size_t outSize = *maxDstSizePtr; + size_t epilogueSize, remaining; + ZBUFF_compressFlush(zbc, dst, &outSize); /* flush any remaining inBuff */ + op += outSize; + epilogueSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff + zbc->outBuffContentSize, zbc->outBuffSize - zbc->outBuffContentSize); /* epilogue into outBuff */ + zbc->outBuffContentSize += epilogueSize; + outSize = oend-op; + zbc->stage = ZBUFFcs_flush; + remaining = ZBUFF_compressFlush(zbc, op, &outSize); /* attempt to flush epilogue into dst */ + op += outSize; + if (!remaining) zbc->stage = ZBUFFcs_init; /* close only if nothing left to flush */ + *maxDstSizePtr = op-ostart; /* tells how many bytes were written */ + return remaining; +} + + + +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* output : 128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ + +typedef enum { ZBUFFds_init, ZBUFFds_readHeader, ZBUFFds_loadHeader, ZBUFFds_decodeHeader, + ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage; + +/* *** Resource management *** */ + +#define ZSTD_frameHeaderSize_max 5 /* too magical, should come from reference */ +struct ZBUFF_DCtx_s { + ZSTD_DCtx* zc; + ZSTD_parameters params; + char* inBuff; + size_t inBuffSize; + size_t inPos; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t hPos; + ZBUFF_dStage stage; + unsigned char headerBuffer[ZSTD_frameHeaderSize_max]; +}; /* typedef'd to ZBUFF_DCtx within "zstd_buffered.h" */ + + +ZBUFF_DCtx* ZBUFF_createDCtx(void) +{ + ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx)); + memset(zbc, 0, sizeof(*zbc)); + zbc->zc = ZSTD_createDCtx(); + zbc->stage = ZBUFFds_init; + return zbc; +} + +size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc) +{ + if (zbc==NULL) return 0; /* support free on null */ + ZSTD_freeDCtx(zbc->zc); + free(zbc); + return 0; +} + + + +/* *** Initialization *** */ + +size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbc) +{ + zbc->stage = ZBUFFds_readHeader; + zbc->hPos = zbc->inPos = zbc->outStart = zbc->outEnd = 0; + return ZSTD_resetDCtx(zbc->zc); +} + + + +/* *** Decompression *** */ + +size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr) +{ + const char* const istart = (const char*)src; + const char* ip = istart; + const char* const iend = istart + *srcSizePtr; + char* const ostart = (char*)dst; + char* op = ostart; + char* const oend = ostart + *maxDstSizePtr; + U32 notDone = 1; + + while (notDone) + { + switch(zbc->stage) + { + + case ZBUFFds_init : + return ERROR(init_missing); + + case ZBUFFds_readHeader : + /* read header from src */ + { + size_t headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr); + if (ZSTD_isError(headerSize)) return headerSize; + if (headerSize) + { + /* not enough input to decode header : tell how many bytes would be necessary */ + memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr); + zbc->hPos += *srcSizePtr; + *maxDstSizePtr = 0; + zbc->stage = ZBUFFds_loadHeader; + return headerSize - zbc->hPos; + } + zbc->stage = ZBUFFds_decodeHeader; + break; + } + + case ZBUFFds_loadHeader: + /* complete header from src */ + { + size_t headerSize = ZBUFF_limitCopy( + zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos, + src, *srcSizePtr); + zbc->hPos += headerSize; + ip += headerSize; + headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos); + if (ZSTD_isError(headerSize)) return headerSize; + if (headerSize) + { + /* not enough input to decode header : tell how many bytes would be necessary */ + *maxDstSizePtr = 0; + return headerSize - zbc->hPos; + } + // zbc->stage = ZBUFFds_decodeHeader; break; /* useless : stage follows */ + } + + case ZBUFFds_decodeHeader: + /* apply header to create / resize buffers */ + { + size_t neededOutSize = (size_t)1 << zbc->params.windowLog; + size_t neededInSize = BLOCKSIZE; /* a block is never > BLOCKSIZE */ + if (zbc->inBuffSize < neededInSize) + { + free(zbc->inBuff); + zbc->inBuffSize = neededInSize; + zbc->inBuff = (char*)malloc(neededInSize); + if (zbc->inBuff == NULL) return ERROR(memory_allocation); + } + if (zbc->outBuffSize < neededOutSize) + { + free(zbc->outBuff); + zbc->outBuffSize = neededOutSize; + zbc->outBuff = (char*)malloc(neededOutSize); + if (zbc->outBuff == NULL) return ERROR(memory_allocation); + } + } + memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos); + zbc->inPos = zbc->hPos; + zbc->hPos = 0; + zbc->stage = ZBUFFds_load; + break; /* useless : stage follows */ + + case ZBUFFds_read: + { + size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc); + if (neededInSize==0) /* end of frame */ + { + zbc->stage = ZBUFFds_init; + notDone = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) + { + /* directly decode from src */ + size_t decodedSize = ZSTD_decompressContinue(zbc->zc, + zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, + ip, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + ip += neededInSize; + if (!decodedSize) break; /* this was just a header */ + zbc->outEnd = zbc->outStart + decodedSize; + zbc->stage = ZBUFFds_flush; + break; + } + if (ip==iend) { notDone = 0; break; } /* no more input */ + zbc->stage = ZBUFFds_load; + } + + case ZBUFFds_load: + { + size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc); + size_t toLoad = neededInSize - zbc->inPos; + size_t loadedSize; + if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, zbc->inBuffSize - zbc->inPos, ip, iend-ip); + ip += loadedSize; + zbc->inPos += loadedSize; + if (loadedSize < toLoad) { notDone = 0; break; } /* not enough input, wait for more */ + { + size_t decodedSize = ZSTD_decompressContinue(zbc->zc, + zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, + zbc->inBuff, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + zbc->inPos = 0; /* input is consumed */ + if (!decodedSize) { zbc->stage = ZBUFFds_read; break; } /* this was just a header */ + zbc->outEnd = zbc->outStart + decodedSize; + zbc->stage = ZBUFFds_flush; + // break; /* ZBUFFds_flush follows */ + } + } + case ZBUFFds_flush: + { + size_t toFlushSize = zbc->outEnd - zbc->outStart; + size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize); + op += flushedSize; + zbc->outStart += flushedSize; + if (flushedSize == toFlushSize) + { + zbc->stage = ZBUFFds_read; + if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize) + zbc->outStart = zbc->outEnd = 0; + } + break; + } + } + } + + *srcSizePtr = ip-istart; + *maxDstSizePtr = op-ostart; + + return ZSTD_nextSrcSizeToDecompress(zbc->zc) - zbc->inPos; +} + + + + + + + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } +const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +size_t ZBUFF_recommendedCInSize() { return BLOCKSIZE; } +size_t ZBUFF_recommendedCOutSize() { return ZSTD_compressBound(BLOCKSIZE) + 6; } +size_t ZBUFF_recommendedDInSize() { return BLOCKSIZE + 3; } +size_t ZBUFF_recommendedDOutSize() { return BLOCKSIZE; } diff --git a/lib/zstd_buffered.h b/lib/zstd_buffered.h new file mode 100644 index 00000000000..f79982a1325 --- /dev/null +++ b/lib/zstd_buffered.h @@ -0,0 +1,146 @@ +/* + Buffered version of Zstd compression library + Copyright (C) 2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef ZSTD_BUFFERED_H +#define ZSTD_BUFFERED_H + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFF_CCtx_s ZBUFF_CCtx; +ZBUFF_CCtx* ZBUFF_createCCtx(void); +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); + +size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); +size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); +size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr); +size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr); + +/** ************************************************ +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume input stream. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written within *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or move dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *maxDstSizePtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *maxDstSizePtr is too small. +* In which case, call again ZBUFF_compressFlush() to complete the flush. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value. +* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. +* By using both, you ensure that input will be entirely consumed, and output will always contain the result. +* **************************************************/ + + +typedef struct ZBUFF_DCtx_s ZBUFF_DCtx; +ZBUFF_DCtx* ZBUFF_createDCtx(void); +size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); + +size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); +size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); + +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize +* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFF_isError(size_t errorCode); +const char* ZBUFF_getErrorName(size_t errorCode); + +/** The below functions provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are not compulsory, they just tend to offer better latency */ +size_t ZBUFF_recommendedCInSize(void); +size_t ZBUFF_recommendedCOutSize(void); +size_t ZBUFF_recommendedDInSize(void); +size_t ZBUFF_recommendedDOutSize(void); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_BUFFERED_H */ diff --git a/lib/zstd_buffered_static.h b/lib/zstd_buffered_static.h new file mode 100644 index 00000000000..cba1d6fca49 --- /dev/null +++ b/lib/zstd_buffered_static.h @@ -0,0 +1,62 @@ +/* + zstd - buffered version of compression library + experimental complementary API, for static linking only + Copyright (C) 2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef ZSTD_BUFFERED_STATIC_H +#define ZSTD_BUFFERED_STATIC_H + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include "zstd_static.h" +#include "zstd_buffered.h" + + +/* ************************************* +* Advanced Streaming functions +***************************************/ +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, ZSTD_parameters params); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_BUFFERED_STATIC_H */ diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 0c9b0e87313..21865d337f5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -138,7 +138,7 @@ static unsigned ZSTD_highbit(U32 val); /** ZSTD_validateParams correct params value to remain within authorized range optimize for srcSize if srcSize > 0 */ -void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint) +void ZSTD_validateParams(ZSTD_parameters* params) { const U32 btPlus = (params->strategy == ZSTD_btlazy2); @@ -147,12 +147,13 @@ void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint) if (params->windowLog < ZSTD_WINDOWLOG_MIN) params->windowLog = ZSTD_WINDOWLOG_MIN; /* correct params, to use less memory */ - if ((srcSizeHint > 0) && (srcSizeHint < (1<srcSize > 0) && (params->srcSize < (1<srcSize)-1) + 1; if (params->windowLog > srcLog) params->windowLog = srcLog; } + if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ if (params->contentLog < ZSTD_CONTENTLOG_MIN) params->contentLog = ZSTD_CONTENTLOG_MIN; if (params->hashLog > ZSTD_HASHLOG_MAX) params->hashLog = ZSTD_HASHLOG_MAX; @@ -166,11 +167,8 @@ void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint) static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, - ZSTD_parameters params, - U64 srcSizeHint) + ZSTD_parameters params) { - ZSTD_validateParams(¶ms, srcSizeHint); - /* reserve table memory */ { const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; @@ -207,10 +205,12 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } +/** ZSTD_reduceIndex +* rescale indexes to avoid future overflow (indexes are U32) */ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) { - const U32 contentLog = zc->params.strategy == ZSTD_fast ? 1 : zc->params.contentLog; + const U32 contentLog = (zc->params.strategy == ZSTD_fast) ? 1 : zc->params.contentLog; const U32 tableSpaceU32 = (1 << contentLog) + (1 << zc->params.hashLog); U32* table32 = zc->hashTable; U32 index; @@ -2038,30 +2038,51 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, } +/** ZSTD_compressBegin_advanced +* Write frame header, according to params +* @return : nb of bytes written */ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, - const ZSTD_parameters params, - const U64 srcSizeHint) + ZSTD_parameters params) { size_t errorCode; - if (maxDstSize < 4) return ERROR(dstSize_tooSmall); - errorCode = ZSTD_resetCCtx_advanced(ctx, params, srcSizeHint); + + ZSTD_validateParams(¶ms); + + if (maxDstSize < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); + errorCode = ZSTD_resetCCtx_advanced(ctx, params); if (ZSTD_isError(errorCode)) return errorCode; - MEM_writeLE32(dst, ZSTD_magicNumber); /* Write Header */ - return 4; + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); /* Write Header */ + ((BYTE*)dst)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); + return ZSTD_frameHeaderSize_min; } -size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint) +/** ZSTD_getParams +* return ZSTD_parameters structure for a selected compression level and srcSize. +* srcSizeHint value is optional, select 0 if not known */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint) { - int tableID = ((srcSizeHint-1) > 128 KB); /* intentional underflow for srcSizeHint == 0 */ + ZSTD_parameters result; + int tableID = ((srcSizeHint-1) <= 128 KB); /* intentional underflow for srcSizeHint == 0 */ if (compressionLevel<=0) compressionLevel = 1; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; - return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_defaultParameters[tableID][compressionLevel], srcSizeHint); + result = ZSTD_defaultParameters[tableID][compressionLevel]; + result.srcSize = srcSizeHint; + return result; +} + + +size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint) +{ + return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, srcSizeHint)); } +/** ZSTD_compressEnd +* Write frame epilogue +* @return : nb of bytes written into dst (or an error code) */ size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize) { BYTE* op = (BYTE*)dst; @@ -2079,16 +2100,16 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize) } size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - ZSTD_parameters params) + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + ZSTD_parameters params) { BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; size_t oSize; /* Header */ - oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params, srcSize); + oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params); if(ZSTD_isError(oSize)) return oSize; op += oSize; maxDstSize -= oSize; @@ -2110,10 +2131,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) { - const int tableID = (srcSize > 128 KB); - if (compressionLevel < 1) compressionLevel = 1; - if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; - return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_defaultParameters[tableID][compressionLevel]); + return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_getParams(compressionLevel, srcSize)); } size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 430dc7acad5..54429628d6b 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -120,6 +120,9 @@ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /* ************************************************************* * Context management ***************************************************************/ +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage; + struct ZSTD_DCtx_s { U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; @@ -130,18 +133,21 @@ struct ZSTD_DCtx_s void* vBase; void* dictEnd; size_t expected; + size_t headerSize; + ZSTD_parameters params; blockType_t bType; - U32 phase; + ZSTD_dStage stage; const BYTE* litPtr; size_t litBufSize; size_t litSize; BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */]; + BYTE headerBuffer[ZSTD_frameHeaderSize_max]; }; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */ size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx) { - dctx->expected = ZSTD_frameHeaderSize; - dctx->phase = 0; + dctx->expected = ZSTD_frameHeaderSize_min; + dctx->stage = ZSTDds_getFrameHeaderSize; dctx->previousDstEnd = NULL; dctx->base = NULL; dctx->vBase = NULL; @@ -167,6 +173,45 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) /* ************************************************************* * Decompression section ***************************************************************/ +/** ZSTD_decodeFrameHeader_Part1 +* decode the 1st part of the Frame Header, which tells Frame Header size. +* srcSize must be == ZSTD_frameHeaderSize_min +* @return : the full size of the Frame Header */ +static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize) +{ + U32 magicNumber; + if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); + magicNumber = MEM_readLE32(src); + if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); + zc->headerSize = ZSTD_frameHeaderSize_min; + return zc->headerSize; +} + +/** ZSTD_decodeFrameHeader_Part2 +* decode the full Frame Header +* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1 +* @return : 0, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + if (srcSize != zc->headerSize) return ERROR(srcSize_wrong); + memset(&(zc->params), 0, sizeof(zc->params)); + zc->params.windowLog = ip[4] + ZSTD_WINDOWLOG_ABSOLUTEMIN; + return 0; +} + + +size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize) +{ + U32 magicNumber; + if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max; + magicNumber = MEM_readLE32(src); + if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); + memset(params, 0, sizeof(*params)); + params->windowLog = ((const BYTE*)src)[4] + ZSTD_WINDOWLOG_ABSOLUTEMIN; + return 0; +} + size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { @@ -655,7 +700,6 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; size_t remainingSize = srcSize; - U32 magicNumber; blockProperties_t blockProperties; @@ -663,14 +707,23 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v ctx->base = ctx->vBase = ctx->dictEnd = dst; /* Frame Header */ - if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - magicNumber = MEM_readLE32(src); + { + size_t frameHeaderSize; + if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - if (ZSTD_isLegacy(magicNumber)) - return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber); + { + const U32 magicNumber = MEM_readLE32(src); + if (ZSTD_isLegacy(magicNumber)) + return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber); + } #endif - if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); - ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + ip += frameHeaderSize; remainingSize -= frameHeaderSize; + frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + } /* Loop on each block */ while (1) @@ -722,7 +775,6 @@ size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t src /* ****************************** * Streaming Decompression API ********************************/ - size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; @@ -732,7 +784,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con { /* Sanity check */ if (srcSize != ctx->expected) return ERROR(srcSize_wrong); - if (dst != ctx->previousDstEnd) /* not contiguous */ + if (dst != ctx->previousDstEnd) /* not contiguous */ { ctx->dictEnd = ctx->previousDstEnd; if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment right into tracked memory */ @@ -741,65 +793,85 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con ctx->base = dst; } - /* Decompress : frame header */ - if (ctx->phase == 0) - { - /* Check frame magic header */ - U32 magicNumber = MEM_readLE32(src); - if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); - ctx->phase = 1; - ctx->expected = ZSTD_blockHeaderSize; - return 0; - } - - /* Decompress : block header */ - if (ctx->phase == 1) + /* Decompress : frame header; part 1 */ + switch (ctx->stage) { - blockProperties_t bp; - size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(blockSize)) return blockSize; - if (bp.blockType == bt_end) + case ZSTDds_getFrameHeaderSize : { - ctx->expected = 0; - ctx->phase = 0; + /* get frame header size */ + if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */ + ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min); + if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize; + memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min); + if (ctx->headerSize > ZSTD_frameHeaderSize_min) + { + ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min; + ctx->stage = ZSTDds_decodeFrameHeader; + return 0; + } + ctx->expected = 0; /* not necessary to copy more */ } - else + case ZSTDds_decodeFrameHeader: { - ctx->expected = blockSize; - ctx->bType = bp.blockType; - ctx->phase = 2; + /* get frame header */ + size_t result; + memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected); + result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize); + if (ZSTD_isError(result)) return result; + ctx->expected = ZSTD_blockHeaderSize; + ctx->stage = ZSTDds_decodeBlockHeader; + return 0; } + case ZSTDds_decodeBlockHeader: + { + /* Decode block header */ + blockProperties_t bp; + size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(blockSize)) return blockSize; + if (bp.blockType == bt_end) + { + ctx->expected = 0; + ctx->stage = ZSTDds_getFrameHeaderSize; + } + else + { + ctx->expected = blockSize; + ctx->bType = bp.blockType; + ctx->stage = ZSTDds_decompressBlock; + } - ctx->previousDstEnd = dst; - return 0; - } - - /* Decompress : block content */ - { - size_t rSize; - switch(ctx->bType) + ctx->previousDstEnd = dst; + return 0; + } + case 3: { - case bt_compressed: - rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); - break; - case bt_raw : - rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize); - break; - case bt_rle : - return ERROR(GENERIC); /* not yet handled */ - break; - case bt_end : /* should never happen (filtered at phase 1) */ - rSize = 0; - break; - default: - return ERROR(GENERIC); + /* Decompress : block content */ + size_t rSize; + switch(ctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return ERROR(GENERIC); + } + ctx->stage = ZSTDds_decodeBlockHeader; + ctx->expected = ZSTD_blockHeaderSize; + ctx->previousDstEnd = (char*)dst + rSize; + return rSize; } - ctx->phase = 1; - ctx->expected = ZSTD_blockHeaderSize; - ctx->previousDstEnd = (char*)dst + rSize; - return rSize; + default: + return ERROR(GENERIC); /* impossible */ } - } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 7236cf2aba1..bddfc929354 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -54,6 +54,8 @@ extern "C" { /* ************************************* * Common constants ***************************************/ +#define ZSTD_MAGICNUMBER 0xFD2FB524 /* v0.4 */ + #define KB *(1 <<10) #define MB *(1 <<20) #define GB *(1U<<30) @@ -61,7 +63,8 @@ extern "C" { #define BLOCKSIZE (128 KB) /* define, for static allocation */ static const size_t ZSTD_blockHeaderSize = 3; -static const size_t ZSTD_frameHeaderSize = 4; +static const size_t ZSTD_frameHeaderSize_min = 5; +#define ZSTD_frameHeaderSize_max 5 /* define, for static allocation */ #define BIT7 128 #define BIT6 64 diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 8c56c13e413..ed06d69dd54 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -52,16 +52,29 @@ extern "C" { /* ************************************* * Types ***************************************/ +#define ZSTD_WINDOWLOG_MAX 26 +#define ZSTD_WINDOWLOG_MIN 18 +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11 +#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CONTENTLOG_MIN 4 +#define ZSTD_HASHLOG_MAX 28 +#define ZSTD_HASHLOG_MIN 4 +#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_SEARCHLENGTH_MAX 7 +#define ZSTD_SEARCHLENGTH_MIN 4 + /** from faster to stronger */ typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy; typedef struct { - U32 windowLog; /* largest match distance : impact decompression buffer size */ + U64 srcSize; /* optional : tells how much bytes are present in the frame. Use 0 if not known. */ + U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ - U32 hashLog; /* dispatch table : larger == more memory, faster*/ - U32 searchLog; /* nb of searches : larger == more compression, slower*/ - U32 searchLength; /* size of matches : larger == faster decompression */ + U32 hashLog; /* dispatch table : larger == more memory, faster */ + U32 searchLog; /* nb of searches : larger == more compression, slower */ + U32 searchLength; /* size of matches : larger == faster decompression, sometimes less compression */ ZSTD_strategy strategy; } ZSTD_parameters; @@ -69,23 +82,29 @@ typedef struct /* ************************************* * Advanced function ***************************************/ +/** ZSTD_getParams +* return ZSTD_parameters structure for a selected compression level and srcSize. +* srcSizeHint value is optional, select 0 if not known */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint); + +/** ZSTD_validateParams +* correct params value to remain within authorized range */ +void ZSTD_validateParams(ZSTD_parameters* params); + /** ZSTD_compress_advanced * Same as ZSTD_compressCCtx(), with fine-tune control of each compression parameter */ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - ZSTD_parameters params); - -/** ZSTD_validateParams - correct params value to remain within authorized range - srcSizeHint value is optional, select 0 if not known */ -void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint); + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + ZSTD_parameters params); -/* ************************************* -* Streaming functions -***************************************/ +/* ************************************** +* Streaming functions (bufferless mode) +****************************************/ size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint); +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params); + size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); @@ -95,86 +114,90 @@ ZSTD_DCtx* ZSTD_createDCtx(void); size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx); size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); +size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize); size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); -/* - Use above functions alternatively. +/** + Streaming decompression, bufferless mode + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. Use ZSTD_resetDCtx() to return to fresh status. + + First operation is to retrieve frame parameters, using ZSTD_getFrameParams(). + This function doesn't consume its input. It needs enough input data to properly decode the frame header. + The objective is to retrieve *params.windowlog, to know how much memory is required during decoding. + Result : 0 if successfull, it means the ZSTD_parameters structure has been filled. + >0 : means there is not enough data into src. Provides the expected size to successfully decode header. + errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header) + + Then it's possible to start decompression. + Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. - Result is the number of bytes regenerated within 'dst'. + ZSTD_decompressContinue() will use previous data blocks during decompress. + They should be located contiguously prior to current block. Alternatively, a round buffer is possible. + Just make sure that the combined of current and accessible past blocks is a minimum of (1 << windowlog). + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst'. It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. -*/ -/* ************************************* -* Prefix - version detection -***************************************/ -#define ZSTD_magicNumber 0xFD2FB523 /* v0.3 (current)*/ + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. +*/ /* ************************************* * Pre-defined compression levels ***************************************/ #define ZSTD_MAX_CLEVEL 20 -#define ZSTD_WINDOWLOG_MAX 26 -#define ZSTD_WINDOWLOG_MIN 18 -#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CONTENTLOG_MIN 4 -#define ZSTD_HASHLOG_MAX 28 -#define ZSTD_HASHLOG_MIN 4 -#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_SEARCHLENGTH_MAX 7 -#define ZSTD_SEARCHLENGTH_MIN 4 - static const ZSTD_parameters ZSTD_defaultParameters[2][ZSTD_MAX_CLEVEL+1] = { -{ /* for <= 128 KB */ - /* W, C, H, S, L, strat */ - { 17, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ - { 17, 12, 13, 1, 6, ZSTD_fast }, /* level 1 */ - { 17, 15, 16, 1, 5, ZSTD_fast }, /* level 2 */ - { 17, 16, 17, 1, 5, ZSTD_fast }, /* level 3 */ - { 17, 13, 15, 2, 4, ZSTD_greedy }, /* level 4 */ - { 17, 15, 17, 3, 4, ZSTD_greedy }, /* level 5 */ - { 17, 14, 17, 3, 4, ZSTD_lazy }, /* level 6 */ - { 17, 16, 17, 4, 4, ZSTD_lazy }, /* level 7 */ - { 17, 16, 17, 4, 4, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 16, 5, 4, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 16, 6, 4, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 16, 7, 4, ZSTD_lazy2 }, /* level 11 */ - { 17, 17, 16, 8, 4, ZSTD_lazy2 }, /* level 12 */ - { 17, 18, 16, 4, 4, ZSTD_btlazy2 }, /* level 13 */ - { 17, 18, 16, 5, 4, ZSTD_btlazy2 }, /* level 14 */ - { 17, 18, 16, 6, 4, ZSTD_btlazy2 }, /* level 15 */ - { 17, 18, 16, 7, 4, ZSTD_btlazy2 }, /* level 16 */ - { 17, 18, 16, 8, 4, ZSTD_btlazy2 }, /* level 17 */ - { 17, 18, 16, 9, 4, ZSTD_btlazy2 }, /* level 18 */ - { 17, 18, 16, 10, 4, ZSTD_btlazy2 }, /* level 19 */ - { 17, 18, 18, 12, 4, ZSTD_btlazy2 }, /* level 20 */ +{ /* "default" */ + /* W, C, H, S, L, strat */ + { 0, 18, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ + { 0, 19, 13, 14, 1, 7, ZSTD_fast }, /* level 1 */ + { 0, 19, 15, 16, 1, 6, ZSTD_fast }, /* level 2 */ + { 0, 20, 18, 20, 1, 6, ZSTD_fast }, /* level 3 */ + { 0, 21, 19, 21, 1, 6, ZSTD_fast }, /* level 4 */ + { 0, 20, 14, 18, 3, 5, ZSTD_greedy }, /* level 5 */ + { 0, 20, 18, 19, 3, 5, ZSTD_greedy }, /* level 6 */ + { 0, 21, 17, 20, 3, 5, ZSTD_lazy }, /* level 7 */ + { 0, 21, 19, 20, 3, 5, ZSTD_lazy }, /* level 8 */ + { 0, 21, 20, 20, 3, 5, ZSTD_lazy2 }, /* level 9 */ + { 0, 21, 19, 21, 4, 5, ZSTD_lazy2 }, /* level 10 */ + { 0, 22, 20, 22, 4, 5, ZSTD_lazy2 }, /* level 11 */ + { 0, 22, 20, 22, 5, 5, ZSTD_lazy2 }, /* level 12 */ + { 0, 22, 21, 22, 5, 5, ZSTD_lazy2 }, /* level 13 */ + { 0, 22, 22, 23, 5, 5, ZSTD_lazy2 }, /* level 14 */ + { 0, 23, 23, 23, 5, 5, ZSTD_lazy2 }, /* level 15 */ + { 0, 23, 21, 22, 5, 5, ZSTD_btlazy2 }, /* level 16 */ + { 0, 23, 24, 23, 4, 5, ZSTD_btlazy2 }, /* level 17 */ + { 0, 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */ + { 0, 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */ + { 0, 25, 26, 25, 6, 5, ZSTD_btlazy2 }, /* level 20 */ }, -{ /* for > 128 KB */ +{ /* for srcSize <= 128 KB */ /* W, C, H, S, L, strat */ - { 18, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ - { 19, 13, 14, 1, 7, ZSTD_fast }, /* level 1 */ - { 19, 15, 16, 1, 6, ZSTD_fast }, /* level 2 */ - { 20, 18, 20, 1, 6, ZSTD_fast }, /* level 3 */ - { 21, 19, 21, 1, 6, ZSTD_fast }, /* level 4 */ - { 20, 14, 18, 3, 5, ZSTD_greedy }, /* level 5 */ - { 20, 18, 19, 3, 5, ZSTD_greedy }, /* level 6 */ - { 21, 17, 20, 3, 5, ZSTD_lazy }, /* level 7 */ - { 21, 19, 20, 3, 5, ZSTD_lazy }, /* level 8 */ - { 21, 20, 20, 3, 5, ZSTD_lazy2 }, /* level 9 */ - { 21, 19, 21, 4, 5, ZSTD_lazy2 }, /* level 10 */ - { 22, 20, 22, 4, 5, ZSTD_lazy2 }, /* level 11 */ - { 22, 20, 22, 5, 5, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, ZSTD_lazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, ZSTD_lazy2 }, /* level 14 */ - { 23, 23, 23, 5, 5, ZSTD_lazy2 }, /* level 15 */ - { 23, 21, 22, 5, 5, ZSTD_btlazy2 }, /* level 16 */ - { 23, 24, 23, 4, 5, ZSTD_btlazy2 }, /* level 17 */ - { 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */ - { 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */ - { 25, 26, 25, 6, 5, ZSTD_btlazy2 }, /* level 20 */ -} + { 0, 17, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */ + { 0, 17, 12, 13, 1, 6, ZSTD_fast }, /* level 1 */ + { 0, 17, 15, 16, 1, 5, ZSTD_fast }, /* level 2 */ + { 0, 17, 16, 17, 1, 5, ZSTD_fast }, /* level 3 */ + { 0, 17, 13, 15, 2, 4, ZSTD_greedy }, /* level 4 */ + { 0, 17, 15, 17, 3, 4, ZSTD_greedy }, /* level 5 */ + { 0, 17, 14, 17, 3, 4, ZSTD_lazy }, /* level 6 */ + { 0, 17, 16, 17, 4, 4, ZSTD_lazy }, /* level 7 */ + { 0, 17, 16, 17, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 0, 17, 17, 16, 5, 4, ZSTD_lazy2 }, /* level 9 */ + { 0, 17, 17, 16, 6, 4, ZSTD_lazy2 }, /* level 10 */ + { 0, 17, 17, 16, 7, 4, ZSTD_lazy2 }, /* level 11 */ + { 0, 17, 17, 16, 8, 4, ZSTD_lazy2 }, /* level 12 */ + { 0, 17, 18, 16, 4, 4, ZSTD_btlazy2 }, /* level 13 */ + { 0, 17, 18, 16, 5, 4, ZSTD_btlazy2 }, /* level 14 */ + { 0, 17, 18, 16, 6, 4, ZSTD_btlazy2 }, /* level 15 */ + { 0, 17, 18, 16, 7, 4, ZSTD_btlazy2 }, /* level 16 */ + { 0, 17, 18, 16, 8, 4, ZSTD_btlazy2 }, /* level 17 */ + { 0, 17, 18, 16, 9, 4, ZSTD_btlazy2 }, /* level 18 */ + { 0, 17, 18, 16, 10, 4, ZSTD_btlazy2 }, /* level 19 */ + { 0, 17, 18, 18, 12, 4, ZSTD_btlazy2 }, /* level 20 */ +}, }; diff --git a/programs/Makefile b/programs/Makefile index eddd3258371..697bfe40a48 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -58,12 +58,12 @@ default: zstd all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 paramgrill datagen -zstd: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +zstd: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c $(ZSTDDIR)/zstd_buffered.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ xxhash.c bench.c fileio.c zstdcli.c legacy/fileio_legacy.c $(CC) $(FLAGS) $^ -o $@$(EXT) -zstd32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ +zstd32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c $(ZSTDDIR)/zstd_buffered.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ xxhash.c bench.c fileio.c zstdcli.c legacy/fileio_legacy.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) diff --git a/programs/fileio.c b/programs/fileio.c index c599c476a26..267ddb5620c 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -49,8 +49,6 @@ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - #define _FILE_OFFSET_BITS 64 /* Large file support on 32-bits unix */ #define _POSIX_SOURCE 1 /* enable fileno() within on unix */ @@ -67,10 +65,11 @@ #include /* stat64 */ #include "mem.h" #include "fileio.h" -#include "zstd_static.h" +#include "zstd_static.h" /* ZSTD_magicNumber */ +#include "zstd_buffered_static.h" #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) -# include "zstd_legacy.h" /* legacy */ +# include "zstd_legacy.h" /* legacy */ # include "fileio_legacy.h" /* legacy */ #endif @@ -82,7 +81,7 @@ # include /* _O_BINARY */ # include /* _setmode, _isatty */ # ifdef __MINGW32__ - /* int _fileno(FILE *stream); // seems no longer useful // MINGW somehow forgets to include this windows declaration into */ + // int _fileno(FILE *stream); /* seems no longer useful /* MINGW somehow forgets to include this windows declaration into */ # endif # define SET_BINARY_MODE(file) { int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; } # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) @@ -114,8 +113,8 @@ #define BIT6 0x40 #define BIT7 0x80 -//static const unsigned FIO_maxBlockSizeID = 0xB; /* => 2MB block */ -static const unsigned FIO_blockHeaderSize = 3; +#define BLOCKSIZE (128 KB) +#define ROLLBUFFERSIZE (BLOCKSIZE*8*64) #define FIO_FRAMEHEADERSIZE 5 /* as a define, because needed to allocated table on stack */ #define FSE_CHECKSUM_SEED 0 @@ -241,36 +240,25 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* U64 filesize = 0; U64 compressedfilesize = 0; BYTE* inBuff; - BYTE* inSlot; - BYTE* inEnd; BYTE* outBuff; - size_t blockSize = 128 KB; - size_t inBuffSize = 4 * blockSize; - size_t outBuffSize = ZSTD_compressBound(blockSize); + size_t inBuffSize = ZBUFF_recommendedCInSize(); + size_t outBuffSize = ZBUFF_recommendedCOutSize(); FILE* finput; FILE* foutput; - size_t sizeCheck, cSize; - ZSTD_CCtx* ctx; - - /* init */ - FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); - filesize = FIO_getFileSize(input_filename); + size_t sizeCheck, errorCode; + ZBUFF_CCtx* ctx; /* Allocate Memory */ - ctx = ZSTD_createCCtx(); + ctx = ZBUFF_createCCtx(); inBuff = (BYTE*)malloc(inBuffSize); outBuff = (BYTE*)malloc(outBuffSize); if (!inBuff || !outBuff || !ctx) EXM_THROW(21, "Allocation error : not enough memory"); - inSlot = inBuff; - inEnd = inBuff + inBuffSize; - - /* Write Frame Header */ - cSize = ZSTD_compressBegin(ctx, outBuff, outBuffSize, cLevel, filesize); - if (ZSTD_isError(cSize)) EXM_THROW(22, "Compression error : cannot create frame header"); - sizeCheck = fwrite(outBuff, 1, cSize, foutput); - if (sizeCheck!=cSize) EXM_THROW(23, "Write error : cannot write header into %s", output_filename); - compressedfilesize += cSize; + /* init */ + FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); + filesize = FIO_getFileSize(input_filename); + errorCode = ZBUFF_compressInit_advanced(ctx, ZSTD_getParams(cLevel, filesize)); + if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing compression"); filesize = 0; /* Main compression loop */ @@ -279,33 +267,41 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* size_t inSize; /* Fill input Buffer */ - if (inSlot + blockSize > inEnd) inSlot = inBuff; - inSize = fread(inSlot, (size_t)1, blockSize, finput); + inSize = fread(inBuff, (size_t)1, inBuffSize, finput); if (inSize==0) break; filesize += inSize; DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); - /* Compress Block */ - cSize = ZSTD_compressContinue(ctx, outBuff, outBuffSize, inSlot, inSize); - if (ZSTD_isError(cSize)) - EXM_THROW(24, "Compression error : %s ", ZSTD_getErrorName(cSize)); - - /* Write cBlock */ - sizeCheck = fwrite(outBuff, 1, cSize, foutput); - if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", output_filename); - compressedfilesize += cSize; - inSlot += inSize; + { + /* Compress (buffered streaming ensures appropriate formatting) */ + size_t usedInSize = inSize; + size_t cSize = outBuffSize; + size_t result = ZBUFF_compressContinue(ctx, outBuff, &cSize, inBuff, &usedInSize); + if (ZBUFF_isError(result)) + EXM_THROW(23, "Compression error : %s ", ZBUFF_getErrorName(result)); + if (inSize != usedInSize) + /* inBuff should be entirely consumed since buffer sizes are recommended ones */ + EXM_THROW(24, "Compression error : input block not fully consumed"); + + /* Write cBlock */ + sizeCheck = fwrite(outBuff, 1, cSize, foutput); + if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", output_filename); + compressedfilesize += cSize; + } DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100); } /* End of Frame */ - cSize = ZSTD_compressEnd(ctx, outBuff, outBuffSize); - if (ZSTD_isError(cSize)) EXM_THROW(26, "Compression error : cannot create frame end"); + { + size_t cSize = outBuffSize; + size_t result = ZBUFF_compressEnd(ctx, outBuff, &cSize); + if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end"); - sizeCheck = fwrite(outBuff, 1, cSize, foutput); - if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end into %s", output_filename); - compressedfilesize += cSize; + sizeCheck = fwrite(outBuff, 1, cSize, foutput); + if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end into %s", output_filename); + compressedfilesize += cSize; + } /* Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); @@ -315,7 +311,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* /* clean */ free(inBuff); free(outBuff); - ZSTD_freeCCtx(ctx); + ZBUFF_freeCCtx(ctx); fclose(finput); if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename); @@ -324,124 +320,87 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput, - BYTE* inBuff, size_t inBuffSize, + BYTE* inBuff, size_t inBuffSize, size_t alreadyLoaded, BYTE* outBuff, size_t outBuffSize, - ZSTD_DCtx* dctx) + ZBUFF_DCtx* dctx) { - BYTE* op = outBuff; - BYTE* const oend = outBuff + outBuffSize; - U64 filesize = 0; - size_t toRead; - size_t sizeCheck; - + U64 frameSize = 0; + size_t readSize=alreadyLoaded; /* Main decompression Loop */ - toRead = ZSTD_nextSrcSizeToDecompress(dctx); - while (toRead) + ZBUFF_decompressInit(dctx); + while (1) { - size_t readSize, decodedSize; + /* Decode */ + size_t sizeCheck; + size_t inSize=readSize, decodedSize=outBuffSize; + size_t inStart=0; + size_t toRead = ZBUFF_decompressContinue(dctx, outBuff, &decodedSize, inBuff+inStart, &inSize); + if (ZBUFF_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZBUFF_getErrorName(toRead)); + if (toRead==0) break; /* end of Frame */ + readSize -= inSize; + inStart += inSize; + + /* Write block */ + sizeCheck = fwrite(outBuff, 1, decodedSize, foutput); + if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file"); + frameSize += decodedSize; + DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(frameSize>>20) ); + + if (readSize) continue; /* still some data left within inBuff */ /* Fill input buffer */ - if (toRead > inBuffSize) - EXM_THROW(34, "too large block"); + if (toRead > inBuffSize) EXM_THROW(34, "too large block"); readSize = fread(inBuff, 1, toRead, finput); - if (readSize != toRead) - EXM_THROW(35, "Read error"); - - /* Decode block */ - decodedSize = ZSTD_decompressContinue(dctx, op, oend-op, inBuff, readSize); - if (ZSTD_isError(decodedSize)) EXM_THROW(36, "Decoding error : input corrupted"); - - if (decodedSize) /* not a header */ - { - /* Write block */ - sizeCheck = fwrite(op, 1, decodedSize, foutput); - if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file"); - filesize += decodedSize; - op += decodedSize; - if (op==oend) op = outBuff; - DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(filesize>>20) ); - } - - /* prepare for next Block */ - toRead = ZSTD_nextSrcSizeToDecompress(dctx); + if (readSize != toRead) EXM_THROW(35, "Read error"); } - return filesize; + return frameSize; } -#define MAXHEADERSIZE (FIO_FRAMEHEADERSIZE+3) unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename) { FILE* finput, *foutput; BYTE* inBuff=NULL; - size_t inBuffSize = 0; + size_t inBuffSize = ZBUFF_recommendedDInSize(); BYTE* outBuff=NULL; - size_t outBuffSize = 0; - U32 blockSize = 128 KB; - U32 wNbBlocks = 4; + size_t outBuffSize = ZBUFF_recommendedDOutSize(); U64 filesize = 0; - BYTE* header[MAXHEADERSIZE]; size_t toRead; size_t sizeCheck; /* Init */ - ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZBUFF_DCtx* dctx = ZBUFF_createDCtx(); FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); + /* Allocate Memory (if needed) */ + inBuff = (BYTE*)malloc(inBuffSize); + outBuff = (BYTE*)malloc(outBuffSize); + if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory"); + /* for each frame */ for ( ; ; ) { - /* check magic number -> version */ U32 magicNumber; - toRead = sizeof(ZSTD_magicNumber);; - sizeCheck = fread(header, (size_t)1, toRead, finput); - if (sizeCheck==0) break; /* no more input */ - if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header"); + toRead = 0; - magicNumber = MEM_readLE32(header); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) + /* check magic number -> version */ + toRead = 4; + sizeCheck = fread(inBuff, (size_t)1, toRead, finput); + if (sizeCheck==0) break; /* no more input */ + if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header"); + magicNumber = MEM_readLE32(inBuff); if (ZSTD_isLegacy(magicNumber)) { filesize += FIO_decompressLegacyFrame(foutput, finput, magicNumber); continue; } #endif /* ZSTD_LEGACY_SUPPORT */ - if (magicNumber != ZSTD_magicNumber) EXM_THROW(32, "Error : unknown frame prefix"); - - /* prepare frame decompression, by completing header */ - ZSTD_resetDCtx(dctx); - toRead = ZSTD_nextSrcSizeToDecompress(dctx) - sizeof(ZSTD_magicNumber); - if (toRead > MAXHEADERSIZE) EXM_THROW(30, "Not enough memory to read header"); - sizeCheck = fread(&header[sizeof(ZSTD_magicNumber)], 1, toRead, finput); - if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header"); - sizeCheck = ZSTD_decompressContinue(dctx, NULL, 0, header, sizeof(ZSTD_magicNumber)+toRead); // Decode frame header - if (ZSTD_isError(sizeCheck)) EXM_THROW(32, "Error decoding header"); - - /* Here later : blockSize determination */ - - /* Allocate Memory (if needed) */ - { - size_t newInBuffSize = blockSize + FIO_blockHeaderSize; - size_t newOutBuffSize = wNbBlocks * blockSize; - if (newInBuffSize > inBuffSize) - { - free(inBuff); - inBuffSize = newInBuffSize; - inBuff = (BYTE*)malloc(inBuffSize); - } - if (newOutBuffSize > outBuffSize) - { - free(outBuff); - outBuffSize = newOutBuffSize; - outBuff = (BYTE*)malloc(outBuffSize); - } - } - if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory"); - filesize += FIO_decompressFrame(foutput, finput, inBuff, inBuffSize, outBuff, outBuffSize, dctx); + filesize += FIO_decompressFrame(foutput, finput, inBuff, inBuffSize, toRead, outBuff, outBuffSize, dctx); } DISPLAYLEVEL(2, "\r%79s\r", ""); @@ -450,7 +409,7 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha /* clean */ free(inBuff); free(outBuff); - ZSTD_freeDCtx(dctx); + ZBUFF_freeDCtx(dctx); fclose(finput); if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename); @@ -458,74 +417,3 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha } -#if 0 -unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename) -{ - FILE* finput, *foutput; - BYTE* inBuff=NULL; - size_t inBuffSize = 0; - BYTE* outBuff=NULL; - size_t outBuffSize = 0; - U32 blockSize = 128 KB; - U32 wNbBlocks = 4; - U64 filesize = 0; - BYTE* header[MAXHEADERSIZE]; - ZSTD_Dctx* dctx; - size_t toRead; - size_t sizeCheck; - - - /* Init */ - FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); - dctx = ZSTD_createDCtx(); - - /* for each frame */ - for ( ; ; ) - { - /* check header */ - ZSTD_resetDCtx(dctx); - toRead = ZSTD_nextSrcSizeToDecompress(dctx); - if (toRead > MAXHEADERSIZE) EXM_THROW(30, "Not enough memory to read header"); - sizeCheck = fread(header, (size_t)1, toRead, finput); - if (sizeCheck==0) break; /* no more input */ - if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header"); - sizeCheck = ZSTD_decompressContinue(dctx, NULL, 0, header, toRead); // Decode frame header - if (ZSTD_isError(sizeCheck)) EXM_THROW(32, "Error decoding header"); - - /* Here later : blockSize determination */ - - /* Allocate Memory (if needed) */ - { - size_t newInBuffSize = blockSize + FIO_blockHeaderSize; - size_t newOutBuffSize = wNbBlocks * blockSize; - if (newInBuffSize > inBuffSize) - { - free(inBuff); - inBuffSize = newInBuffSize; - inBuff = (BYTE*)malloc(inBuffSize); - } - if (newOutBuffSize > outBuffSize) - { - free(outBuff); - outBuffSize = newOutBuffSize; - outBuff = (BYTE*)malloc(outBuffSize); - } - } - if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory"); - - filesize += FIO_decompressFrame(foutput, finput, inBuff, inBuffSize, outBuff, outBuffSize, dctx); - } - - DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Decoded %llu bytes \n", (long long unsigned)filesize); - - /* clean */ - free(inBuff); - free(outBuff); - ZSTD_freeDCtx(dctx); - fclose(finput); - if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename); - - return filesize; -} -#endif diff --git a/programs/fullbench.c b/programs/fullbench.c index 5ee5d493153..cb4822022f1 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -243,31 +243,6 @@ size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const return ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &length, DTableLL, DTableML, DTableOffb, buff2, g_cSize); } -size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) -{ - U32 i; - size_t total = 0; - BYTE* data = (BYTE*)buff2; - - (void)dst; (void)dstSize; (void)src; - for (i=0; i < srcSize; i++) - { - U32 b = data[i]; - total += b; - if (b==0) total = 0; // 825 - //if (!b) total = 0; // 825 - //total = b ? total : 0; // 622 - //total &= -!b; // 622 - //total *= !!b; // 465 - } - return total; -} - -size_t local_decodeLiteralsForward(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) -{ - (void)src; (void)srcSize; - return FSE_decompress(dst, dstSize, buff2, g_cSize); -} @@ -300,12 +275,6 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) case 32: benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders"; break; - case 101: - benchFunction = local_conditionalNull; benchName = "conditionalNull"; - break; - case 102: - benchFunction = local_decodeLiteralsForward; benchName = "ZSTD_decodeLiteralsForward"; - break; default : return 0; } @@ -332,14 +301,14 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) { blockProperties_t bp; g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); - ZSTD_getcBlockSize(dstBuff+4, dstBuffSize, &bp); // Get first block type + ZSTD_getcBlockSize(dstBuff+4, dstBuffSize, &bp); /* Get 1st block type */ if (bp.blockType != bt_compressed) { DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n"); goto _cleanOut; } - memcpy(buff2, dstBuff+7, g_cSize-7); - srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block + memcpy(buff2, dstBuff+8, g_cSize-8); + srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ break; } case 32: /* ZSTD_decodeSeqHeaders */ @@ -348,9 +317,9 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) const BYTE* ip = dstBuff; const BYTE* iend; size_t blockSize; - ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); - ip += 4; // Jump magic Number - blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); // Get first block type + ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, 1); /* it would be better to use direct block compression here */ + ip += 5; /* Skip frame Header */ + blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp); /* Get 1st block type */ if (bp.blockType != bt_compressed) { DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n"); @@ -358,32 +327,16 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb) } iend = ip + 3 + blockSize; /* End of first block */ ip += 3; /* skip block header */ - ip += ZSTD_decodeLiteralsBlock(g_dctxPtr, ip, iend-ip); // jump literal sub block and its header + ip += ZSTD_decodeLiteralsBlock(g_dctxPtr, ip, iend-ip); /* skip literal segment */ g_cSize = iend-ip; - memcpy(buff2, ip, g_cSize); // copy rest of block (starting with SeqHeader) - srcSize = srcSize > 128 KB ? 128 KB : srcSize; // speed relative to block + memcpy(buff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */ + srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ break; } /* test functions */ + /* by convention, test functions can be added > 100 */ - case 101: /* conditionalNull */ - { - size_t i; - for (i=0; i 128 KB ? 128 KB : srcSize; // relative to block - break; - } default : ; } diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 47df78ab998..56fb621ad24 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -46,7 +46,7 @@ #if defined(_MSC_VER) # define snprintf _snprintf /* snprintf unsupported by Visual <= 2012 */ #endif - + /************************************** * Includes @@ -125,8 +125,7 @@ static U32 g_rand = 1; static U32 g_singleRun = 0; static U32 g_target = 0; static U32 g_noSeed = 0; -static const ZSTD_parameters* g_seedParams = ZSTD_defaultParameters[0]; -static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, ZSTD_greedy }; +static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, ZSTD_greedy }; void BMK_SetNbIterations(int nbLoops) { @@ -139,28 +138,6 @@ void BMK_SetNbIterations(int nbLoops) * Private functions *********************************************************/ -static unsigned BMK_highbit(U32 val) -{ -# if defined(_MSC_VER) /* Visual */ - unsigned long r; - _BitScanReverse(&r, val); - return (unsigned)r; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); -# else /* Software version */ - static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - int r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - #if defined(BMK_LEGACY_TIMER) static int BMK_GetMilliStart(void) @@ -655,7 +632,7 @@ static void playAround(FILE* f, winnerInfo_t* winners, /* validate new conf */ { ZSTD_parameters saved = p; - ZSTD_validateParams(&p, g_blockSize ? g_blockSize : srcSize); + ZSTD_validateParams(&p); if (memcmp(&p, &saved, sizeof(p))) continue; /* p was invalid */ } @@ -707,12 +684,12 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) const char* rfName = "grillResults.txt"; FILE* f; const size_t blockSize = g_blockSize ? g_blockSize : srcSize; - const U32 srcLog = BMK_highbit((U32)(blockSize-1))+1; if (g_singleRun) { BMK_result_t testResult; - ZSTD_validateParams(&g_params, blockSize); + g_params.srcSize = blockSize; + ZSTD_validateParams(&g_params); BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, g_params); DISPLAY("\n"); return; @@ -735,9 +712,10 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) params.searchLog = 1; params.searchLength = 7; params.strategy = ZSTD_fast; - ZSTD_validateParams(¶ms, blockSize); + params.srcSize = blockSize; + ZSTD_validateParams(¶ms); BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); - g_cSpeedTarget[1] = (testResult.cSpeed * 15) >> 4; + g_cSpeedTarget[1] = (testResult.cSpeed * 31) >> 5; } /* establish speed objectives (relative to level 1) */ @@ -746,16 +724,10 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) /* populate initial solution */ { - const int tableID = (blockSize > 128 KB); const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL; - g_seedParams = ZSTD_defaultParameters[tableID]; for (i=1; i<=maxSeeds; i++) { - const U32 btPlus = (params.strategy == ZSTD_btlazy2); - params = g_seedParams[i]; - params.windowLog = MIN(srcLog, params.windowLog); - params.contentLog = MIN(params.windowLog+btPlus, params.contentLog); - params.searchLog = MIN(params.contentLog, params.searchLog); + params = ZSTD_getParams(i, blockSize); BMK_seed(winners, params, srcBuffer, srcSize, ctx); } } @@ -963,7 +935,7 @@ int main(int argc, char** argv) case 'S': g_singleRun = 1; argument++; - g_params = g_seedParams[2]; + g_params = ZSTD_getParams(2, g_blockSize); for ( ; ; ) { switch(*argument) @@ -1013,7 +985,7 @@ int main(int argc, char** argv) argument++; while ((*argument>= '0') && (*argument<='9')) cLevel *= 10, cLevel += *argument++ - '0'; - g_params = g_seedParams[cLevel]; + g_params = ZSTD_getParams(cLevel, g_blockSize); continue; } default : ; From f86c0e7d10c1ae653f02b19d3dccd7750e60f4cf Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 15:00:12 +0100 Subject: [PATCH 46/79] Fixed Visual project --- visual/2012/zstd/zstd.vcxproj | 3 +++ visual/2012/zstd/zstd.vcxproj.filters | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/visual/2012/zstd/zstd.vcxproj b/visual/2012/zstd/zstd.vcxproj index 989ec2e405f..2baf8100d7a 100644 --- a/visual/2012/zstd/zstd.vcxproj +++ b/visual/2012/zstd/zstd.vcxproj @@ -23,6 +23,7 @@ + @@ -40,6 +41,8 @@ + + diff --git a/visual/2012/zstd/zstd.vcxproj.filters b/visual/2012/zstd/zstd.vcxproj.filters index a4679505c35..d841979307d 100644 --- a/visual/2012/zstd/zstd.vcxproj.filters +++ b/visual/2012/zstd/zstd.vcxproj.filters @@ -48,6 +48,9 @@ Fichiers sources + + Fichiers sources + @@ -92,5 +95,11 @@ Fichiers d%27en-tête + + Fichiers d%27en-tête + + + Fichiers d%27en-tête + \ No newline at end of file From 7b2c31043ca953e2669b6192be41c409e29d988a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 15:02:46 +0100 Subject: [PATCH 47/79] fixed : potential malloc error --- lib/zstd_buffered.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index 28bff9ec3a0..eaa1d26fc89 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -331,6 +331,7 @@ struct ZBUFF_DCtx_s { ZBUFF_DCtx* ZBUFF_createDCtx(void) { ZBUFF_DCtx* zbc = (ZBUFF_DCtx*)malloc(sizeof(ZBUFF_DCtx)); + if (zbc==NULL) return NULL; memset(zbc, 0, sizeof(*zbc)); zbc->zc = ZSTD_createDCtx(); zbc->stage = ZBUFFds_init; From 377b9e8b94deb7d974e423d6908ec6cfee3423f0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 15:04:37 +0100 Subject: [PATCH 48/79] fixed : potential malloc error --- lib/zstd_buffered.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index eaa1d26fc89..19ce06ebb33 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -98,6 +98,7 @@ struct ZBUFF_CCtx_s { ZBUFF_CCtx* ZBUFF_createCCtx(void) { ZBUFF_CCtx* zbc = (ZBUFF_CCtx*)malloc(sizeof(ZBUFF_CCtx)); + if (zbc==NULL) return NULL; memset(zbc, 0, sizeof(*zbc)); zbc->zc = ZSTD_createCCtx(); return zbc; From 785b5ecc54722170c93d0dc20af20da6034d536f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 15:16:19 +0100 Subject: [PATCH 49/79] stronger tests --- programs/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index 697bfe40a48..dba59f9be01 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -193,17 +193,17 @@ test-zstd: zstd datagen @diff tmp1 tmp2 ./datagen -g17MB -P80 | ./zstd -v15 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g9MB -P85 | md5sum > tmp1 - ./datagen -g9MB -P85 | ./zstd -v16 | ./zstd -d | md5sum > tmp2 + ./datagen -g17MB -P80 | ./zstd -v16 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g9MB -P85 | ./zstd -v17 | ./zstd -d | md5sum > tmp2 + ./datagen -g17MB -P80 | ./zstd -v17 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g9MB -P85 | ./zstd -v18 | ./zstd -d | md5sum > tmp2 + @./datagen -g49MB -P93 | md5sum > tmp1 + ./datagen -g49MB -P93 | ./zstd -v18 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - @./datagen -g5MB -P89 | md5sum > tmp1 - ./datagen -g5MB -P89 | ./zstd -v19 | ./zstd -d | md5sum > tmp2 + ./datagen -g49MB -P93 | ./zstd -v19 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 - ./datagen -g5MB -P89 | ./zstd -v20 | ./zstd -d | md5sum > tmp2 + @./datagen -g97MB -P99 | md5sum > tmp1 + ./datagen -g97MB -P99 | ./zstd -v20 | ./zstd -d | md5sum > tmp2 @diff tmp1 tmp2 ./datagen -g6GB -P99 | md5sum > tmp1 ./datagen -g6GB -P99 | ./zstd -vq | ./zstd -d | md5sum > tmp2 From d3cb6901560d5523b41c6de1fac9dc910608099e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 15:26:55 +0100 Subject: [PATCH 50/79] fixed leak issue --- lib/zstd_buffered.c | 2 ++ programs/Makefile | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index 19ce06ebb33..693943da10b 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -108,6 +108,8 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) { if (zbc==NULL) return 0; /* support free on NULL */ ZSTD_freeCCtx(zbc->zc); + free(zbc->inBuff); + free(zbc->outBuff); free(zbc); return 0; } diff --git a/programs/Makefile b/programs/Makefile index dba59f9be01..22c2cd67227 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -234,7 +234,7 @@ valgrindTest: zstd datagen fuzzer fullbench ./datagen -g16KB > tmp valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID) ./datagen -g2930KB > tmp - valgrind --leak-check=yes --error-exitcode=1 ./zstd -4 -vf tmp $(VOID) + valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp $(VOID) ./datagen -g64MB > tmp valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID) @rm tmp From e4fdad55dc4e68f0550de77002b7277a330dd143 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Nov 2015 21:09:17 +0100 Subject: [PATCH 51/79] fixed minor decompression bug in buffered mode --- lib/zstd_buffered.c | 15 ++++++++++----- lib/zstd_decompress.c | 7 +++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index 693943da10b..242ddf24d77 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -439,11 +439,16 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt if (zbc->outBuff == NULL) return ERROR(memory_allocation); } } - memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos); - zbc->inPos = zbc->hPos; - zbc->hPos = 0; - zbc->stage = ZBUFFds_load; - break; /* useless : stage follows */ + if (zbc->hPos) + { + /* some data already loaded into headerBuffer : transfer into inBuff */ + memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos); + zbc->inPos = zbc->hPos; + zbc->hPos = 0; + zbc->stage = ZBUFFds_load; + break; + } + zbc->stage = ZBUFFds_read; case ZBUFFds_read: { diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 54429628d6b..34d5d86aec1 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -68,7 +68,6 @@ # include "zstd_legacy.h" #endif - /* ******************************************************* * Compiler specifics *********************************************************/ @@ -120,7 +119,7 @@ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /* ************************************************************* * Context management ***************************************************************/ -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage; struct ZSTD_DCtx_s @@ -449,7 +448,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) /* Literal length */ litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); - prevOffset = litLength ? seq->offset : seqState->prevOffset; + prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { U32 add = *dumps++; @@ -801,7 +800,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con /* get frame header size */ if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */ ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min); - if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize; + if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize; memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min); if (ctx->headerSize > ZSTD_frameHeaderSize_min) { From 91a97960f797ab20c53cd279cd91b09076c43e88 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 09:59:49 +0100 Subject: [PATCH 52/79] Added zbufftest --- lib/zstd_buffered.h | 4 +- programs/Makefile | 9 +- programs/zbufftest.c | 521 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 530 insertions(+), 4 deletions(-) create mode 100644 programs/zbufftest.c diff --git a/lib/zstd_buffered.h b/lib/zstd_buffered.h index f79982a1325..ecbc1df5671 100644 --- a/lib/zstd_buffered.h +++ b/lib/zstd_buffered.h @@ -113,8 +113,8 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t* maxDstSizeP * Use ZBUFF_decompressContinue() repetitively to consume your input. * *srcSizePtr and *maxDstSizePtr can be any size. * The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. -* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. -* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst . +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. * @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) * or 0 when a frame is completely decoded * or an error code, which can be tested using ZBUFF_isError(). diff --git a/programs/Makefile b/programs/Makefile index 22c2cd67227..4683a946893 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -35,7 +35,7 @@ VERSION?= 0.4.0 DESTDIR?= PREFIX ?= /usr/local CPPFLAGS= -I../lib -I../lib/legacy -I./legacy -DZSTD_VERSION=\"$(VERSION)\" -DZSTD_LEGACY_SUPPORT=1 -CFLAGS ?= -O3 # -falign-loops=32 # not always positive +CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) @@ -88,6 +88,11 @@ fuzzer32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse datagen.c xxhash.c fuzzer.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) +zbufftest : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/zstd_buffered.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ + $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ + datagen.c xxhash.c zbufftest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + paramgrill : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c xxhash.c paramgrill.c @@ -100,7 +105,7 @@ clean: @rm -f core *.o tmp* \ zstd$(EXT) zstd32$(EXT) \ fullbench$(EXT) fullbench32$(EXT) \ - fuzzer$(EXT) fuzzer32$(EXT) \ + fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) \ datagen$(EXT) paramgrill$(EXT) @echo Cleaning completed diff --git a/programs/zbufftest.c b/programs/zbufftest.c new file mode 100644 index 00000000000..0a10e4de5ee --- /dev/null +++ b/programs/zbufftest.c @@ -0,0 +1,521 @@ +/* + Fuzzer test tool for zstd_buffered + Copyright (C) Yann Collet 2105 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - ZSTD source repository : https://github.com/Cyan4973/zstd + - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS /* fgets */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ +# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ +#endif + + +/************************************** +* Includes +**************************************/ +#include /* free */ +#include /* fgets, sscanf */ +#include /* timeb */ +#include /* strcmp */ +#include "mem.h" +#include "zstd_buffered.h" +#include "zstd.h" /* ZSTD_compressBound() */ +#include "datagen.h" /* RDG_genBuffer */ +#include "xxhash.h" /* XXH64 */ + + +/************************************** + Constants +**************************************/ +#ifndef ZSTD_VERSION +# define ZSTD_VERSION "" +#endif + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const U32 nbTestsDefault = 30000; +#define COMPRESSIBLE_NOISE_LENGTH (10 MB) +#define FUZ_COMPRESSIBILITY_DEFAULT 50 +static const U32 prime1 = 2654435761U; +static const U32 prime2 = 2246822519U; + + + +/************************************** +* Display Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((FUZ_GetMilliSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_time = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stdout); } } +static const U32 g_refreshRate = 150; +static U32 g_time = 0; + + +/********************************************************* +* Fuzzer functions +*********************************************************/ +#define MAX(a,b) ((a)>(b)?(a):(b)) + +static U32 FUZ_GetMilliStart(void) +{ + struct timeb tb; + U32 nCount; + ftime( &tb ); + nCount = (U32) (((tb.time & 0xFFFFF) * 1000) + tb.millitm); + return nCount; +} + + +static U32 FUZ_GetMilliSpan(U32 nTimeStart) +{ + U32 nCurrent = FUZ_GetMilliStart(); + U32 nSpan = nCurrent - nTimeStart; + if (nTimeStart > nCurrent) + nSpan += 0x100000 * 1000; + return nSpan; +} + + +# define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +unsigned int FUZ_rand(unsigned int* src) +{ + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + + +/* +static unsigned FUZ_highbit32(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + for ( ; v32 ; v32>>=1) nbBits++; + return nbBits; +} +*/ + +static int basicUnitTests(U32 seed, double compressibility) +{ + int testResult = 0; + void* CNBuffer; + size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; + void* compressedBuffer; + size_t compressedBufferSize = ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH); + void* decodedBuffer; + size_t decodedBufferSize = CNBufferSize; + U32 randState = seed; + size_t result, cSize, readSize, genSize; + U32 testNb=0; + ZBUFF_CCtx* zc = ZBUFF_createCCtx(); + ZBUFF_DCtx* zd = ZBUFF_createDCtx(); + + /* Create compressible test buffer */ + CNBuffer = malloc(CNBufferSize); + compressedBuffer = malloc(compressedBufferSize); + decodedBuffer = malloc(decodedBufferSize); + if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) + { + DISPLAY("Not enough memory, aborting\n"); + goto _output_error; + } + RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., randState); + + /* Basic compression test */ + DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZBUFF_compressInit(zc, 1); + readSize = CNBufferSize; + genSize = compressedBufferSize; + result = ZBUFF_compressContinue(zc, compressedBuffer, &genSize, CNBuffer, &readSize); + if (ZBUFF_isError(result)) goto _output_error; + if (readSize != CNBufferSize) goto _output_error; /* entire input should be consumed */ + cSize = genSize; + genSize = compressedBufferSize - cSize; + result = ZBUFF_compressEnd(zc, compressedBuffer+cSize, &genSize); + if (result != 0) goto _output_error; /* error, or some data not flushed */ + cSize += genSize; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + + /* Basic decompression test */ + DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + ZBUFF_decompressInit(zd); + readSize = cSize; + genSize = CNBufferSize; + result = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, compressedBuffer, &readSize); + if (result != 0) goto _output_error; /* should reach end of frame == 0; otherwise, some data left, or an error */ + if (genSize != CNBufferSize) goto _output_error; /* should regenerate the same amount */ + if (readSize != cSize) goto _output_error; /* should have read the entire frame */ + DISPLAYLEVEL(4, "OK \n"); + + /* check regenerated data is byte exact */ + { + size_t i; + DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + for (i=0; i "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } + +static const U32 maxSrcLog = 23; +static const U32 maxSampleLog = 19; + +int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility) +{ + BYTE* cNoiseBuffer[5]; + BYTE* srcBuffer; + size_t srcBufferSize = (size_t)1<>= 3; + if (buffNb & 7) + { + const U32 tnb[2] = { 1, 3 }; + buffNb = tnb[buffNb >> 3]; + } + else + { + const U32 tnb[2] = { 0, 4 }; + buffNb = tnb[buffNb >> 3]; + } + } + srcBuffer = cNoiseBuffer[buffNb]; + + /* Multi - segments compression test */ + XXH64_reset(&crc64, 0); + nbChunks = (FUZ_rand(&lseed) & 127) + 2; + sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog; + maxTestSize = (size_t)1 << sampleSizeLog; + maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1); + ZBUFF_compressInit(zc, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); + totalTestSize = 0; + cSize = 0; + for (n=0; n maxTestSize) break; + } + + genSize = cBufferSize - cSize; + errorCode = ZBUFF_compressEnd(zc, cBuffer+cSize, &genSize); + CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode)); + CHECK (errorCode != 0, "frame epilogue not fully consumed"); + cSize += genSize; + crcOrig = XXH64_digest(&crc64); + + /* multi - fragments decompression test */ + ZBUFF_decompressInit(zd); + genSize = dstBufferSize; + readSize = cBufferSize; + errorCode = ZBUFF_decompressContinue(zd, dstBuffer, &genSize, cBuffer, &readSize); + CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode)); + CHECK (errorCode != 0, "frame not fully decoded"); + CHECK (genSize != totalTestSize, "decompressed data : wrong size") + crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + + /* noisy/erroneous src decompression test */ + /* TBD later */ + } + DISPLAY("\rAll fuzzer tests completed \n"); + +_cleanup: + ZBUFF_freeCCtx(zc); + ZBUFF_freeDCtx(zd); + free(cNoiseBuffer[0]); + free(cNoiseBuffer[1]); + free(cNoiseBuffer[2]); + free(cNoiseBuffer[3]); + free(cNoiseBuffer[4]); + free(copyBuffer); + free(cBuffer); + free(dstBuffer); + return result; + +_output_error: + result = 1; + goto _cleanup; +} + + +/********************************************************* +* Command line +*********************************************************/ +int FUZ_usage(char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Nb of tests (default:%u) \n", nbTestsDefault); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, char** argv) +{ + U32 seed=0; + int seedset=0; + int argNb; + int nbTests = nbTestsDefault; + int testNb = 0; + int proba = FUZ_COMPRESSIBILITY_DEFAULT; + int result=0; + U32 mainPause = 0; + char* programName; + + /* Check command line */ + programName = argv[0]; + for(argNb=1; argNb='0') && (*argument<='9')) + { + nbTests *= 10; + nbTests += *argument - '0'; + argument++; + } + break; + + case 's': + argument++; + seed=0; + seedset=1; + while ((*argument>='0') && (*argument<='9')) + { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + + case 't': + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) + { + testNb *= 10; + testNb += *argument - '0'; + argument++; + } + break; + + case 'P': /* compressibility % */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) + { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + + default: + return FUZ_usage(programName); + } + } + } + } + + /* Get Seed */ + DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION); + + if (!seedset) seed = FUZ_GetMilliStart() % 10000; + DISPLAY("Seed = %u\n", seed); + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + + if (nbTests<=0) nbTests=1; + + if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */ + if (!result) + result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + if (mainPause) + { + int unused; + DISPLAY("Press Enter \n"); + unused = getchar(); + (void)unused; + } + return result; +} From 5f2ec63852a6e7d91ba496aeda5230761bc64108 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 10:32:17 +0100 Subject: [PATCH 53/79] fixed decompression bug (buffered mode) --- lib/zstd_buffered.c | 4 ++-- programs/Makefile | 20 ++++++++++++++++---- programs/zbufftest.c | 28 +++++++++++++++++++--------- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index 242ddf24d77..aa858c7f167 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -479,10 +479,10 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt case ZBUFFds_load: { size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc); - size_t toLoad = neededInSize - zbc->inPos; + size_t toLoad = neededInSize - zbc->inPos; /* should always be <= remaining space within inBuff */ size_t loadedSize; if (toLoad > zbc->inBuffSize - zbc->inPos) return ERROR(corruption_detected); /* should never happen */ - loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, zbc->inBuffSize - zbc->inPos, ip, iend-ip); + loadedSize = ZBUFF_limitCopy(zbc->inBuff + zbc->inPos, toLoad, ip, iend-ip); ip += loadedSize; zbc->inPos += loadedSize; if (loadedSize < toLoad) { notDone = 0; break; } /* not enough input, wait for more */ diff --git a/programs/Makefile b/programs/Makefile index 4683a946893..a0cad24c13e 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -53,10 +53,11 @@ EXT = VOID = /dev/null endif +.PHONY: default all clean install uninstall test test32 test-all default: zstd -all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 paramgrill datagen +all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 paramgrill datagen zstd: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c $(ZSTDDIR)/zstd_buffered.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ @@ -93,6 +94,11 @@ zbufftest : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/ datagen.c xxhash.c zbufftest.c $(CC) $(FLAGS) $^ -o $@$(EXT) +zbufftest32: $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/zstd_buffered.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ + $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ + datagen.c xxhash.c zbufftest.c + $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) + paramgrill : $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c \ $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c \ datagen.c xxhash.c paramgrill.c @@ -105,7 +111,7 @@ clean: @rm -f core *.o tmp* \ zstd$(EXT) zstd32$(EXT) \ fullbench$(EXT) fullbench32$(EXT) \ - fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) \ + fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \ datagen$(EXT) paramgrill$(EXT) @echo Cleaning completed @@ -135,9 +141,9 @@ uninstall: [ -f $(DESTDIR)$(MANDIR)/zstd.1 ] && rm -f $(DESTDIR)$(MANDIR)/zstd.1 @echo zstd programs successfully uninstalled -test: test-zstd test-fullbench test-fuzzer +test: test-zstd test-fullbench test-fuzzer test-zbuff -test32: test-zstd32 test-fullbench32 test-fuzzer32 +test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zbuff32 test-all: test test32 valgrindTest @@ -233,6 +239,12 @@ test-fuzzer: fuzzer test-fuzzer32: fuzzer32 ./fuzzer32 +test-zbuff: zbufftest + ./zbufftest + +test-zbuff32: zbufftest32 + ./zbufftest32 + valgrindTest: zstd datagen fuzzer fullbench @echo "\n ---- valgrind tests : memory analyzer ----" valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID) diff --git a/programs/zbufftest.c b/programs/zbufftest.c index 0a10e4de5ee..717c4bce30a 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -64,7 +64,7 @@ #define MB *(1U<<20) #define GB *(1U<<30) -static const U32 nbTestsDefault = 30000; +static const U32 nbTestsDefault = 10000; #define COMPRESSIBLE_NOISE_LENGTH (10 MB) #define FUZ_COMPRESSIBILITY_DEFAULT 50 static const U32 prime1 = 2654435761U; @@ -280,7 +280,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit { size_t sampleSize, sampleStart; size_t cSize; - size_t maxTestSize, totalTestSize, readSize, genSize; + size_t maxTestSize, totalTestSize, readSize, totalCSize, genSize, totalGenSize; size_t errorCode; U32 sampleSizeLog, buffNb, n, nbChunks; U64 crcOrig, crcDest; @@ -330,13 +330,12 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit genSize = cBufferSize - cSize; errorCode = ZBUFF_compressContinue(zc, cBuffer+cSize, &genSize, srcBuffer+sampleStart, &readSize); CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode)); - CHECK (readSize != sampleSize, "test condition not respected : input should be fully consumed") + CHECK (readSize != sampleSize, "compression test condition not respected : input should be fully consumed") cSize += genSize; totalTestSize += sampleSize; if (totalTestSize > maxTestSize) break; } - genSize = cBufferSize - cSize; errorCode = ZBUFF_compressEnd(zc, cBuffer+cSize, &genSize); CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode)); @@ -346,12 +345,23 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit /* multi - fragments decompression test */ ZBUFF_decompressInit(zd); - genSize = dstBufferSize; - readSize = cBufferSize; - errorCode = ZBUFF_decompressContinue(zd, dstBuffer, &genSize, cBuffer, &readSize); - CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode)); + totalCSize = 0; + totalGenSize = 0; + while (totalCSize < cSize) + { + sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog; + sampleSize = (size_t)1 << sampleSizeLog; + sampleSize += FUZ_rand(&lseed) & (sampleSize-1); + readSize = sampleSize; + genSize = dstBufferSize - totalGenSize; + errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize); + CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode)); + totalGenSize += genSize; + totalCSize += readSize; + } CHECK (errorCode != 0, "frame not fully decoded"); - CHECK (genSize != totalTestSize, "decompressed data : wrong size") + CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size") + CHECK (totalCSize != cSize, "compressed data should be fully read") crcDest = XXH64(dstBuffer, totalTestSize, 0); if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); CHECK (crcDest!=crcOrig, "decompressed data corrupted"); From 7447ee96f6b39b0f58de225ea8db50486f7995c2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 10:52:30 +0100 Subject: [PATCH 54/79] fixed g++ warning --- programs/Makefile | 6 ++++-- programs/zbufftest.c | 31 ++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index a0cad24c13e..a8ca605cd42 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -53,6 +53,8 @@ EXT = VOID = /dev/null endif +ZBUFFTEST = -T2mn + .PHONY: default all clean install uninstall test test32 test-all default: zstd @@ -240,10 +242,10 @@ test-fuzzer32: fuzzer32 ./fuzzer32 test-zbuff: zbufftest - ./zbufftest + ./zbufftest $(ZBUFFTEST) test-zbuff32: zbufftest32 - ./zbufftest32 + ./zbufftest32 $(ZBUFFTEST) valgrindTest: zstd datagen fuzzer fullbench @echo "\n ---- valgrind tests : memory analyzer ----" diff --git a/programs/zbufftest.c b/programs/zbufftest.c index 717c4bce30a..b6fd7530bbb 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -80,11 +80,13 @@ static const U32 prime2 = 2246822519U; static U32 g_displayLevel = 2; #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ - if ((FUZ_GetMilliSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) \ - { g_time = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \ + if ((FUZ_GetMilliSpan(g_displayTime) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayTime = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \ if (g_displayLevel>=4) fflush(stdout); } } static const U32 g_refreshRate = 150; -static U32 g_time = 0; +static U32 g_displayTime = 0; + +static U32 g_testTime = 0; /********************************************************* @@ -170,7 +172,7 @@ static int basicUnitTests(U32 seed, double compressibility) if (readSize != CNBufferSize) goto _output_error; /* entire input should be consumed */ cSize = genSize; genSize = compressedBufferSize - cSize; - result = ZBUFF_compressEnd(zc, compressedBuffer+cSize, &genSize); + result = ZBUFF_compressEnd(zc, ((char*)compressedBuffer)+cSize, &genSize); if (result != 0) goto _output_error; /* error, or some data not flushed */ cSize += genSize; DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); @@ -247,6 +249,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit ZBUFF_CCtx* zc; ZBUFF_DCtx* zd; XXH64_state_t crc64; + U32 startTime = FUZ_GetMilliStart(); /* allocation */ zc = ZBUFF_createCCtx(); @@ -276,7 +279,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit FUZ_rand(&coreSeed); /* test loop */ - for ( ; testNb <= nbTests; testNb++ ) + for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ ) { size_t sampleSize, sampleStart; size_t cSize; @@ -369,7 +372,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit /* noisy/erroneous src decompression test */ /* TBD later */ } - DISPLAY("\rAll fuzzer tests completed \n"); + DISPLAY("\r%u fuzzer tests completed \n", testNb); _cleanup: ZBUFF_freeCCtx(zc); @@ -456,7 +459,7 @@ int main(int argc, char** argv) case 'i': argument++; - nbTests=0; + nbTests=0; g_testTime=0; while ((*argument>='0') && (*argument<='9')) { nbTests *= 10; @@ -465,6 +468,20 @@ int main(int argc, char** argv) } break; + case 'T': + argument++; + nbTests=0; g_testTime=0; + while ((*argument>='0') && (*argument<='9')) + { + g_testTime *= 10; + g_testTime += *argument - '0'; + argument++; + } + if (*argument=='m') g_testTime *=60, argument++; + if (*argument=='n') argument++; + g_testTime *= 1000; + break; + case 's': argument++; seed=0; From 6bcdeac9545062192e14b3fb4b17d75405b17f61 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 11:43:00 +0100 Subject: [PATCH 55/79] fixed asan warning --- lib/zstd_buffered.c | 2 +- lib/zstd_compress.c | 16 ++++++++-------- programs/zbufftest.c | 17 +++++++++++++---- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index aa858c7f167..37b5bbcd3c3 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -213,7 +213,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, /* prepare next block */ zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize; if (zbc->inBuffTarget > zbc->inBuffSize) - { zbc->inBuffPos = 0; zbc->inBuffTarget = zbc->blockSize; } + { zbc->inBuffPos = 0; zbc->inBuffTarget = zbc->blockSize; } /* note : inBuffSize >= blockSize */ zbc->inToCompress = zbc->inBuffPos; if (cDst == op) { op += cSize; break; } /* no need to flush */ zbc->outBuffContentSize = cSize; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 21865d337f5..f7a83de8abf 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -748,7 +748,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, /* init */ ZSTD_resetSeqStore(seqStorePtr); - if (ip == base) + if (ip < base+4) { hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1; hashTable[ZSTD_hashPtr(base+2, hBits, mls)] = 2; @@ -787,14 +787,14 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, } /* match found */ - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, mlCode); ip += mlCode + MINMATCH; anchor = ip; if (ip <= ilimit) { /* Fill Table */ - hashTable[ZSTD_hashPtr(ip-(mlCode+MINMATCH)+2, hBits, mls)] = (U32)(ip-(mlCode+MINMATCH)+2-base); /* here because ip-(mlCode+MINMATCH)+2 could be > iend-8 without ip <= ilimit check*/ + hashTable[ZSTD_hashPtr(ip-(mlCode+MINMATCH)+2, hBits, mls)] = (U32)(ip-(mlCode+MINMATCH)+2-base); /* here because ip-(mlCode+MINMATCH)+2 could be > iend-8 without ip <= ilimit check*/ hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) @@ -887,7 +887,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const size_t h = ZSTD_hashPtr(ip, hBits, mls); const U32 matchIndex = hashTable[h]; const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; + const BYTE* match = matchBase + matchIndex; const U32 current = (U32)(ip-base); const U32 repIndex = current + 1 - offset_1; const BYTE* repBase = repIndex < dictLimit ? dictBase : base; @@ -927,7 +927,7 @@ size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, if (ip <= ilimit) { - /* Fill Table */ + /* Fill Table */ hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ @@ -990,7 +990,7 @@ size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, * Binary Tree search ***************************************/ /** ZSTD_insertBt1 : add one or multiple positions to tree -* @ip : assumed <= iend-8 +* @ip : assumed <= iend-8 * @return : nb of positions added */ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) { @@ -1173,7 +1173,7 @@ FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS ( /** ZSTD_insertBt1_extDict : add one or multiple positions to tree -* @ip : assumed <= iend-8 +* @ip : assumed <= iend-8 * @return : nb of positions added */ static U32 ZSTD_insertBt1_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) { @@ -1389,7 +1389,7 @@ FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] -/* Update chains up to ip (excluded) +/* Update chains up to ip (excluded) Assumption : always within prefix (ie. not within extDict) */ static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { diff --git a/programs/zbufftest.c b/programs/zbufftest.c index b6fd7530bbb..c673908395c 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -229,7 +229,7 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) # define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } -static const U32 maxSrcLog = 23; +static const U32 maxSrcLog = 24; static const U32 maxSampleLog = 19; int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility) @@ -313,8 +313,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit /* Multi - segments compression test */ XXH64_reset(&crc64, 0); nbChunks = (FUZ_rand(&lseed) & 127) + 2; - sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog; - maxTestSize = (size_t)1 << sampleSizeLog; + maxTestSize = FUZ_rand(&lseed) % maxSrcLog; + maxTestSize = (size_t)1 << maxTestSize; maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1); ZBUFF_compressInit(zc, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); totalTestSize = 0; @@ -334,9 +334,18 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit errorCode = ZBUFF_compressContinue(zc, cBuffer+cSize, &genSize, srcBuffer+sampleStart, &readSize); CHECK (ZBUFF_isError(errorCode), "compression error : %s", ZBUFF_getErrorName(errorCode)); CHECK (readSize != sampleSize, "compression test condition not respected : input should be fully consumed") - cSize += genSize; totalTestSize += sampleSize; + + if ((FUZ_rand(&lseed) & 15) == 0) + { + /* add a few random flushes operations, to mess around */ + genSize = cBufferSize - cSize; + errorCode = ZBUFF_compressFlush(zc, cBuffer+cSize, &genSize); + CHECK (ZBUFF_isError(errorCode), "flush error : %s", ZBUFF_getErrorName(errorCode)); + cSize += genSize; + } + if (totalTestSize > maxTestSize) break; } genSize = cBufferSize - cSize; From 26415d3ff277362351e644a436c8a38af1835874 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 12:43:28 +0100 Subject: [PATCH 56/79] fixed leak --- lib/zstd_buffered.c | 2 ++ lib/zstd_decompress.c | 25 +++++++++++-------------- programs/Makefile | 6 ++++-- programs/zbufftest.c | 38 ++++++++++++++++++++++++++++++++++---- 4 files changed, 51 insertions(+), 20 deletions(-) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index 37b5bbcd3c3..bbd513d2b13 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -345,6 +345,8 @@ size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbc) { if (zbc==NULL) return 0; /* support free on null */ ZSTD_freeDCtx(zbc->zc); + free(zbc->inBuff); + free(zbc->outBuff); free(zbc); return 0; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 34d5d86aec1..122ff2b06a4 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -186,19 +186,6 @@ static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_ return zc->headerSize; } -/** ZSTD_decodeFrameHeader_Part2 -* decode the full Frame Header -* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1 -* @return : 0, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - if (srcSize != zc->headerSize) return ERROR(srcSize_wrong); - memset(&(zc->params), 0, sizeof(zc->params)); - zc->params.windowLog = ip[4] + ZSTD_WINDOWLOG_ABSOLUTEMIN; - return 0; -} - size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize) { @@ -207,10 +194,20 @@ size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcS magicNumber = MEM_readLE32(src); if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); memset(params, 0, sizeof(*params)); - params->windowLog = ((const BYTE*)src)[4] + ZSTD_WINDOWLOG_ABSOLUTEMIN; + params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN; return 0; } +/** ZSTD_decodeFrameHeader_Part2 +* decode the full Frame Header +* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1 +* @return : 0, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize) +{ + if (srcSize != zc->headerSize) return ERROR(srcSize_wrong); + return ZSTD_getFrameParams(&(zc->params), src, srcSize); +} + size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { diff --git a/programs/Makefile b/programs/Makefile index a8ca605cd42..2fd7e8dc375 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -247,17 +247,19 @@ test-zbuff: zbufftest test-zbuff32: zbufftest32 ./zbufftest32 $(ZBUFFTEST) -valgrindTest: zstd datagen fuzzer fullbench +valgrindTest: zstd datagen fuzzer fullbench zbufftest @echo "\n ---- valgrind tests : memory analyzer ----" valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID) ./datagen -g16KB > tmp valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID) ./datagen -g2930KB > tmp - valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp $(VOID) + valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp tmp2 + valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 $(VOID) ./datagen -g64MB > tmp valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID) @rm tmp valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i1000 -t1 valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 + valgrind --leak-check=yes --error-exitcode=1 ./zbufftest -T1mn endif diff --git a/programs/zbufftest.c b/programs/zbufftest.c index c673908395c..81285fa29c8 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -289,7 +289,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit U64 crcOrig, crcDest; /* init */ - DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); + DISPLAYUPDATE(2, "\r%6u", testNb); + if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u ", nbTests); FUZ_rand(&coreSeed); lseed = coreSeed ^ prime1; buffNb = FUZ_rand(&lseed) & 127; @@ -313,8 +314,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit /* Multi - segments compression test */ XXH64_reset(&crc64, 0); nbChunks = (FUZ_rand(&lseed) & 127) + 2; - maxTestSize = FUZ_rand(&lseed) % maxSrcLog; - maxTestSize = (size_t)1 << maxTestSize; + sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog; + maxTestSize = (size_t)1 << sampleSizeLog; maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1); ZBUFF_compressInit(zc, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); totalTestSize = 0; @@ -379,7 +380,36 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit CHECK (crcDest!=crcOrig, "decompressed data corrupted"); /* noisy/erroneous src decompression test */ - /* TBD later */ + /* add some noise */ + nbChunks = (FUZ_rand(&lseed) & 7) + 2; + for (n=0; n cSize/3) sampleSize = cSize/3; + sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize); + cStart = FUZ_rand(&lseed) % (cSize - sampleSize); + + memcpy(cBuffer+cStart, srcBuffer+sampleStart, sampleSize); + } + + /* try decompression on noisy data */ + ZBUFF_decompressInit(zd); + totalCSize = 0; + totalGenSize = 0; + while (totalCSize < cSize) + { + sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog; + sampleSize = (size_t)1 << sampleSizeLog; + sampleSize += FUZ_rand(&lseed) & (sampleSize-1); + readSize = sampleSize; + genSize = dstBufferSize - totalGenSize; + errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize); + if (ZBUFF_isError(errorCode)) break; /* error correctly detected */ + } } DISPLAY("\r%u fuzzer tests completed \n", testNb); From c2ce890ac2b13c1103eaf6bee9aca631a78cbc07 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 14:12:23 +0100 Subject: [PATCH 57/79] fixed bug in legacy decoder v0.2, reported by Maciej Adamczyk --- lib/legacy/zstd_v02.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 5c0e7aff325..67d726f642c 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -3155,6 +3155,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx, { if (litSize > srcSize-3) return ERROR(corruption_detected); memcpy(dctx->litBuffer, istart, litSize); + dctx->litPtr = dctx->litBuffer; dctx->litBufSize = BLOCKSIZE; dctx->litSize = litSize; return litSize+3; From 661f843637a5ddc7cdc26993ae3f49e4685ead2a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 15:39:44 +0100 Subject: [PATCH 58/79] fixed test in zbufftest --- programs/zbufftest.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/programs/zbufftest.c b/programs/zbufftest.c index 81285fa29c8..f7d2f9833f3 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -32,12 +32,6 @@ # pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -#endif - /************************************** * Includes @@ -226,8 +220,10 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) return i; } -# define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ - DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } static const U32 maxSrcLog = 24; static const U32 maxSampleLog = 19; @@ -273,6 +269,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */ RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */ srcBuffer = cNoiseBuffer[2]; + memset(copyBuffer, 0x65, copyBufferSize); + memcpy(copyBuffer, srcBuffer, MIN(copyBufferSize,srcBufferSize)); /* make copyBuffer considered initialized */ /* catch up testNb */ for (testNb=1; testNb < startTest; testNb++) @@ -409,6 +407,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit genSize = dstBufferSize - totalGenSize; errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &genSize, cBuffer+totalCSize, &readSize); if (ZBUFF_isError(errorCode)) break; /* error correctly detected */ + totalGenSize += genSize; + totalCSize += readSize; } } DISPLAY("\r%u fuzzer tests completed \n", testNb); @@ -565,7 +565,7 @@ int main(int argc, char** argv) } /* Get Seed */ - DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION); + DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION); if (!seedset) seed = FUZ_GetMilliStart() % 10000; DISPLAY("Seed = %u\n", seed); From 29a2c838c5e85afc5a7094107882b6f98abc27b5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Nov 2015 16:02:04 +0100 Subject: [PATCH 59/79] support for legacy 0.3 format --- lib/Makefile | 3 +- lib/legacy/zstd_legacy.h | 8 +- lib/legacy/zstd_v03.c | 3730 +++++++++++++++++++++++++++++++ lib/legacy/zstd_v03.h | 99 + programs/Makefile | 20 +- programs/legacy/fileio_legacy.c | 65 +- 6 files changed, 3910 insertions(+), 15 deletions(-) create mode 100644 lib/legacy/zstd_v03.c create mode 100644 lib/legacy/zstd_v03.h diff --git a/lib/Makefile b/lib/Makefile index be82c34635e..e80ce784617 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -70,7 +70,8 @@ default: clean libzstd all: clean libzstd -libzstd: zstd_compress.c zstd_decompress.c huff0.c fse.c legacy/zstd_v01.c legacy/zstd_v02.c +libzstd: zstd_compress.c zstd_decompress.c huff0.c fse.c \ + legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c @echo compiling static library @$(CC) $(FLAGS) -c $^ @$(AR) rcs libzstd.a *.o diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h index b9ca94199b5..308d1342f94 100644 --- a/lib/legacy/zstd_legacy.h +++ b/lib/legacy/zstd_legacy.h @@ -1,5 +1,5 @@ /* - zstd_v02 - decoder for 0.2 format + zstd_legacy - decoder for legacy format Header File Copyright (C) 2015, Yann Collet. @@ -44,13 +44,15 @@ extern "C" { #include "error.h" /* ERROR */ #include "zstd_v01.h" #include "zstd_v02.h" +#include "zstd_v03.h" MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE) { switch(magicNumberLE) { case ZSTDv01_magicNumberLE : - case ZSTDv02_magicNumber : return 1; + case ZSTDv02_magicNumber : + case ZSTDv03_magicNumber : return 1; default : return 0; } } @@ -67,6 +69,8 @@ MEM_STATIC size_t ZSTD_decompressLegacy( return ZSTDv01_decompress(dst, maxOriginalSize, src, compressedSize); case ZSTDv02_magicNumber : return ZSTDv02_decompress(dst, maxOriginalSize, src, compressedSize); + case ZSTDv03_magicNumber : + return ZSTDv03_decompress(dst, maxOriginalSize, src, compressedSize); default : return ERROR(prefix_unknown); } diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c new file mode 100644 index 00000000000..ae67941c9c9 --- /dev/null +++ b/lib/legacy/zstd_v03.c @@ -0,0 +1,3730 @@ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +#include /* size_t, ptrdiff_t */ +#include "zstd_v03.h" + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(memory_allocation)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ + + +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif // MEM_FORCE_MEMORY_ACCESS + + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + { + MEM_write32(memPtr, val32); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val32; + p[1] = (BYTE)(val32>>8); + p[2] = (BYTE)(val32>>16); + p[3] = (BYTE)(val32>>24); + } +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + { + MEM_write64(memPtr, val64); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val64; + p[1] = (BYTE)(val64>>8); + p[2] = (BYTE)(val64>>16); + p[3] = (BYTE)(val64>>24); + p[4] = (BYTE)(val64>>32); + p[5] = (BYTE)(val64>>40); + p[6] = (BYTE)(val64>>48); + p[7] = (BYTE)(val64>>56); + } +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ + + +/* ****************************************************************** + bitstream + Part of NewGen Entropy library + header file (to include) + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which highly benefit from being inlined. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + + +/********************************************** +* bitStream decompression API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* +* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is manually filled from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream() +*/ + + +/****************************************** +* unsafe API +******************************************/ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/**************************************************************** +* Helper functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + + +/********************************************************** +* bitStream decoding +**********************************************************/ + +/*!BIT_initDStream +* Initialize a BIT_DStream_t. +* @bitD : a pointer to an already allocated BIT_DStream_t structure +* @srcBuffer must point at the beginning of a bitStream +* @srcSize must be the exact size of the bitStream +* @result : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(size_t)) /* normal case */ + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + +/*!BIT_lookBits + * Provides next n bits from local register + * local register is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * @return : value extracted + */ +MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +/*! BIT_lookBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*!BIT_readBits + * Read next n bits from local register. + * pay attention to not read more than nbBits contained into local register. + * @return : extracted value. + */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*!BIT_readBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream +* @return Tells if DStream has reached its exact end +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) +static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + static const char* codeError = "Unspecified error code"; + if (ERR_isError(code)) return ERR_strings[-(int)(code)]; + return codeError; +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/* +Constructor and Destructor of type FSE_CTable + Note that its size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ + + +/* ****************************************************************** + FSE : Finite State Entropy coder + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Static allocation +******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/****************************************** +* FSE unsafe API +******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/****************************************** +* Implementation of inline functions +******************************************/ + +/* decompression */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Static allocation macros +******************************************/ +/* Huff0 buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of Huff0's DTable */ +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* size_t */ + + +/* ************************************* +* Version +***************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 2 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ + +#if defined (__cplusplus) +} +#endif +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Streaming functions +***************************************/ + +typedef struct ZSTD_DCtx_s ZSTD_DCtx; + +/* + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTD_magicNumber 0xFD2FB523 /* v0.3 */ + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + FSE : Finite State Entropy coder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#ifndef FSE_COMMONDEFS_ONLY + +/**************************************************************** +* Tuning parameters +****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION) + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +static size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION) +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1); /* because dt is unsigned, 32-bits aligned on 32-bits */ + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + DTableH[0].tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; ifastMode = (U16)noLarge; + return 0; +} + + +#ifndef FSE_COMMONDEFS_ONLY +/****************************************** +* FSE helper functions +******************************************/ +static unsigned FSE_isError(size_t code) { return ERR_isError(code); } + + +/**************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static short FSE_abs(short a) +{ + return a<0 ? -a : a; +} + +static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return ERROR(srcSize_wrong); + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return ERROR(GENERIC); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt; + FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt; + FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + BIT_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) + { + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ + + return ERROR(corruption_detected); +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/****************************************** +* Helper functions +******************************************/ +static unsigned HUF_isError(size_t code) { return ERR_isError(code); } + +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ +#define HUF_MAX_SYMBOL_VALUE 255 +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + + + +/********************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/*! HUF_readStats + Read compact Huffman tree, saved by HUF_writeCTable + @huffWeight : destination buffer + @return : size read from `src` +*/ +static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + U32 tableLog; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + size_t oSize; + U32 n; + + //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, hwSize); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + + /* get last non-null symbol weight (implied, total must be 2^n) */ + tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + { + U32 total = 1 << tableLog; + U32 rest = total - weightTotal; + U32 verif = 1 << BIT_highbit32(rest); + U32 lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + *tableLogPtr = tableLog; + return iSize+1; +} + + +/**************************/ +/* single-symbol decoding */ +/**************************/ + +static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + U32 nbSymbols = 0; + U32 n; + U32 nextRankStart; + HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1); + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ + DTable[0] = (U16)tableLog; /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=tableLog; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize; +} + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) + { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + + +static size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + size_t errorCode; + + errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/***************************/ +/* double-symbols decoding */ +/***************************/ + +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 s; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) + { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + for (s=0; s= 1 */ + + rankVal[weight] += length; + } +} + +typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) /* enough room for a second symbol */ + { + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } + else + { + U32 i; + const U32 end = start + length; + HUF_DEltX4 DElt; + + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + for (i = start; i < end; i++) + DTable[i] = DElt; + } + rankVal[weight] += length; + } +} + +static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; + sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; + U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + const U32 memLog = DTable[0]; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ + if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { + U32 w, nextRankStart = 0; + for (w=1; w<=maxW; w++) + { + U32 current = nextRankStart; + nextRankStart += rankStats[w]; + rankStart[w] = current; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { + U32 s; + for (s=0; s> consumed; + } + } + } + + HUF_fillDTableX4(dt, memLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else + { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) + { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } + } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) + { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + + +static size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U32* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; + cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/**********************************/ +/* quad-symbol decoding */ +/**********************************/ +typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; +typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; + +/* recursive, up to level 3; may benefit from