diff --git a/.gitignore b/.gitignore index 309594a..1ccbb8e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Directories +_lz5bench/ _codelite/ cmake_unofficial/ contrib/ @@ -7,6 +8,9 @@ examples/ versionsTest/ visual/ +# Archives +*.zip + # Object files *.o *.ko diff --git a/Makefile b/Makefile index 0d69b15..d58710f 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ # ################################################################ # Version number -export VERSION=131 +export VERSION=132 export RELEASE=r$(VERSION) DESTDIR?= diff --git a/NEWS b/NEWS index 22db047..3db9190 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +r132 +- improved compression ratio +- added: new parsers: LZ5HC_fast, LZ5HC_price_fast, LZ5HC_lowest_price +- added: a special 1-byte codeword for the last occured offset +- added: support for 3-byte long matches (MINMATCH = 3) + r131 The first release based on LZ4 r132 dev - diff --git a/README.md b/README.md index f4e69c9..50a51d0 100644 --- a/README.md +++ b/README.md @@ -7,17 +7,18 @@ This is caused mainly because of 22-bit dictionary instead of 16-bit in LZ4. LZ5 uses different output codewords and is not compatible with LZ4. LZ4 output codewords are 3 byte long (24-bit) and look as follows: - LLLL_MMMM OOOOOOOO OOOOOOOO - 16-bit offset, 4-bit match length, 4-bit literal length -LZ5 uses 3 types of codewords from 2 to 4 bytes long: +LZ5 uses 4 types of codewords from 1 to 4 bytes long: - 1_OO_LL_MMM OOOOOOOO - 10-bit offset, 3-bit match length, 2-bit literal length - 00_LLL_MMM OOOOOOOO OOOOOOOO - 16-bit offset, 3-bit match length, 3-bit literal length -- 01_LLL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 3-bit literal length +- 010_LL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 2-bit literal length +- 011_LL_MMM - last offset, 3-bit match length, 2-bit literal length [LZ4]: https://github.com/Cyan4973/lz4 Benchmarks ------------------------- -In our experiments decompression speed of LZ5 is from 650-950 MB/s. It's slower than LZ4 but much faster than zstd and brotli. +In our experiments decompression speed of LZ5 is from 600-1600 MB/s. It's slower than LZ4 but much faster than zstd and brotli. With the compresion ratio is opposite: LZ5 is better than LZ4 but worse than zstd and brotli. | Compressor name | Compression| Decompress.| Compr. size | Ratio | @@ -32,22 +33,28 @@ With the compresion ratio is opposite: LZ5 is better than LZ4 but worse than zst | lz4hc r131 -11 | 20 MB/s | 1969 MB/s | 54751363 | 52.21 | | lz4hc r131 -13 | 17 MB/s | 1969 MB/s | 54744790 | 52.21 | | lz4hc r131 -15 | 14 MB/s | 2007 MB/s | 54741827 | 52.21 | -| lz5 r131 | 195 MB/s | 939 MB/s | 55884927 | 53.30 | -| lz5hc r131 -1 | 32 MB/s | 742 MB/s | 52927122 | 50.48 | -| lz5hc r131 -3 | 20 MB/s | 716 MB/s | 50970192 | 48.61 | -| lz5hc r131 -5 | 10 MB/s | 701 MB/s | 49970285 | 47.66 | -| lz5hc r131 -7 | 5.54 MB/s | 682 MB/s | 49541511 | 47.25 | -| lz5hc r131 -9 | 2.69 MB/s | 673 MB/s | 49346894 | 47.06 | -| lz5hc r131 -11 | 1.36 MB/s | 664 MB/s | 49266526 | 46.98 | -| zstd v0.3 | 257 MB/s | 547 MB/s | 51231016 | 48.86 | -| zstd_HC v0.3 -1 | 257 MB/s | 553 MB/s | 51231016 | 48.86 | -| zstd_HC v0.3 -3 | 76 MB/s | 417 MB/s | 46774383 | 44.61 | -| zstd_HC v0.3 -5 | 40 MB/s | 476 MB/s | 45628362 | 43.51 | -| zstd_HC v0.3 -9 | 14 MB/s | 485 MB/s | 44840562 | 42.76 | -| zstd_HC v0.3 -13 | 9.34 MB/s | 469 MB/s | 43114895 | 41.12 | -| zstd_HC v0.3 -17 | 6.02 MB/s | 463 MB/s | 42989971 | 41.00 | -| zstd_HC v0.3 -21 | 3.35 MB/s | 461 MB/s | 42956964 | 40.97 | -| zstd_HC v0.3 -23 | 2.33 MB/s | 463 MB/s | 42934217 | 40.95 | +| lz5 r132 | 180 MB/s | 877 MB/s | 56183327 | 53.58 | +| lz5hc r132 level 1 | 453 MB/s | 1649 MB/s | 68770655 | 65.58 | +| lz5hc r132 level 2 | 341 MB/s | 1533 MB/s | 65201626 | 62.18 | +| lz5hc r132 level 3 | 222 MB/s | 1267 MB/s | 61423270 | 58.58 | +| lz5hc r132 level 4 | 122 MB/s | 892 MB/s | 55011906 | 52.46 | +| lz5hc r132 level 5 | 92 MB/s | 784 MB/s | 52790905 | 50.35 | +| lz5hc r132 level 6 | 40 MB/s | 872 MB/s | 52561673 | 50.13 | +| lz5hc r132 level 7 | 30 MB/s | 825 MB/s | 50947061 | 48.59 | +| lz5hc r132 level 8 | 21 MB/s | 771 MB/s | 50049555 | 47.73 | +| lz5hc r132 level 9 | 16 MB/s | 702 MB/s | 48718531 | 46.46 | +| lz5hc r132 level 10 | 12 MB/s | 670 MB/s | 48109030 | 45.88 | +| lz5hc r132 level 11 | 6.60 MB/s | 592 MB/s | 47639520 | 45.43 | +| lz5hc r132 level 12 | 3.22 MB/s | 670 MB/s | 47461368 | 45.26 | +| zstd_HC v0.3.6 level 1 | 250 MB/s | 529 MB/s | 51230550 | 48.86 | +| zstd_HC v0.3.6 level 2 | 186 MB/s | 498 MB/s | 49678572 | 47.38 | +| zstd_HC v0.3.6 level 3 | 90 MB/s | 484 MB/s | 48838293 | 46.58 | +| zstd_HC v0.3.6 level 5 | 61 MB/s | 467 MB/s | 46480999 | 44.33 | +| zstd_HC v0.3.6 level 7 | 28 MB/s | 480 MB/s | 44803941 | 42.73 | +| zstd_HC v0.3.6 level 9 | 15 MB/s | 497 MB/s | 43899996 | 41.87 | +| zstd_HC v0.3.6 level 12 | 11 MB/s | 505 MB/s | 42402232 | 40.44 | +| zstd_HC v0.3.6 level 16 | 2.29 MB/s | 499 MB/s | 42122327 | 40.17 | +| zstd_HC v0.3.6 level 20 | 1.65 MB/s | 454 MB/s | 41884658 | 39.94 | | brotli 2015-10-29 -1 | 86 MB/s | 208 MB/s | 47882059 | 45.66 | | brotli 2015-10-29 -3 | 60 MB/s | 214 MB/s | 47451223 | 45.25 | | brotli 2015-10-29 -5 | 17 MB/s | 217 MB/s | 43363897 | 41.36 | diff --git a/lib/Makefile b/lib/Makefile index e463931..51aa67a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,7 +31,7 @@ # ################################################################ # Version numbers -VERSION?= 131 +VERSION?= 132 LIBVER_MAJOR:=`sed -n '/define LZ5_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz5.h` LIBVER_MINOR:=`sed -n '/define LZ5_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz5.h` LIBVER_PATCH:=`sed -n '/define LZ5_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz5.h` diff --git a/lib/lz5.c b/lib/lz5.c index faf6cba..8676099 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -1,6 +1,7 @@ /* LZ5 - Fast LZ compression algorithm Copyright (C) 2011-2015, Yann Collet. + Copyright (C) 2015, Przemyslaw Skibinski BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -33,392 +34,19 @@ */ -/************************************** -* Tuning parameters -**************************************/ -/* - * HEAPMODE : - * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). - */ -#define HEAPMODE 0 - -/* - * ACCELERATION_DEFAULT : - * Select "acceleration" for LZ5_compress_fast() when parameter value <= 0 - */ -#define ACCELERATION_DEFAULT 1 - - -/************************************** -* CPU Feature Detection -**************************************/ -/* LZ5_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets which generate assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef LZ5_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define LZ5_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) -# define LZ5_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -/* - * LZ5_FORCE_SW_BITCOUNT - * Define this parameter if your target system or compiler does not support hardware bit count - */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ -# define LZ5_FORCE_SW_BITCOUNT -#endif - /************************************** * Includes **************************************/ +#include "lz5common.h" #include "lz5.h" +#include -/************************************** -* Compiler Options -**************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ -#else -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# if defined(__GNUC__) || defined(__clang__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif /* _MSC_VER */ - -/* LZ5_GCC_VERSION is defined into lz5.h */ -#if (LZ5_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#define likely(expr) expect((expr) != 0, 1) -#define unlikely(expr) expect((expr) != 0, 0) - - -/************************************** -* Memory routines -**************************************/ -#include /* malloc, calloc, free */ -#define ALLOCATOR(n,s) calloc(n,s) -#define FREEMEM free -#include /* memset, memcpy */ -#define MEM_INIT memset - - -/************************************** -* Basic Types -**************************************/ -#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - - -/************************************** -* Reading and writing into memory -**************************************/ -#define STEPSIZE sizeof(size_t) - -static unsigned LZ5_64bits(void) { return sizeof(void*)==8; } - -static unsigned LZ5_isLittleEndian(void) -{ - const union { U32 i; BYTE c[4]; } one = { 1 }; // don't use static : performance detrimental - return one.c[0]; -} - - -#if defined(LZ5_FORCE_MEMORY_ACCESS) && (LZ5_FORCE_MEMORY_ACCESS==2) - -static U16 LZ5_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 LZ5_read32(const void* memPtr) { return *(const U32*) memPtr; } -static size_t LZ5_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; } - -static void LZ5_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(LZ5_FORCE_MEMORY_ACCESS) && (LZ5_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; size_t uArch; } __attribute__((packed)) unalign; - -static U16 LZ5_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 LZ5_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static size_t LZ5_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } - -static void LZ5_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } - -#else - -static U16 LZ5_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U32 LZ5_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static size_t LZ5_read_ARCH(const void* memPtr) -{ - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static void LZ5_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif // LZ5_FORCE_MEMORY_ACCESS - - -static U16 LZ5_readLE16(const void* memPtr) -{ - if (LZ5_isLittleEndian()) - { - return LZ5_read16(memPtr); - } - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)((U16)p[0] + (p[1]<<8)); - } -} - -static U32 LZ5_readLE24(const void* memPtr) -{ - if (LZ5_isLittleEndian()) - { - U32 val32 = 0; - memcpy(&val32, memPtr, 3); - return val32; - } - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U32)(p[0] + (p[1]<<8) + (p[2]<<16)); - } -} - -static void LZ5_writeLE16(void* memPtr, U16 value) -{ - if (LZ5_isLittleEndian()) - { - LZ5_write16(memPtr, value); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } -} - -static void LZ5_writeLE24(void* memPtr, U32 value) -{ - if (LZ5_isLittleEndian()) - { - memcpy(memPtr, &value, 3); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - p[2] = (BYTE)(value>>16); - } -} - - -static void LZ5_copy8(void* dst, const void* src) -{ - memcpy(dst,src,8); -} - -/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */ -static void LZ5_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) -{ - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* const e = (BYTE*)dstEnd; - -#if 0 - const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1)); - LZ5_copy8(d,s); if (d>e-9) return; - d+=l2; s+=l2; -#endif /* join to align */ - - do { LZ5_copy8(d,s); d+=8; s+=8; } while (d>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ5_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, (U32)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } - else /* Big Endian CPU */ - { - if (LZ5_64bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ5_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_clzll((U64)val) >> 3); -# else - unsigned r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ5_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } - } -} - -static unsigned LZ5_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while (likely(pIn> ((MINMATCH*8)-(LZ5_HASHLOG+1))); + return (((sequence) * prime4bytes) >> ((32)-(LZ5_HASHLOG+1))); else - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ5_HASHLOG)); + return (((sequence) * prime4bytes) >> ((32)-LZ5_HASHLOG)); } -static const U64 prime5bytes = 889523592379ULL; static U32 LZ5_hashSequence64(size_t sequence, tableType_t const tableType) { const U32 hashLog = (tableType == byU16) ? LZ5_HASHLOG+1 : LZ5_HASHLOG; @@ -478,12 +105,12 @@ static U32 LZ5_hashSequence64(size_t sequence, tableType_t const tableType) static U32 LZ5_hashSequenceT(size_t sequence, tableType_t const tableType) { - if (LZ5_64bits()) + if (MEM_64bits()) return LZ5_hashSequence64(sequence, tableType); return LZ5_hashSequence((U32)sequence, tableType); } -static U32 LZ5_hashPosition(const void* p, tableType_t tableType) { return LZ5_hashSequenceT(LZ5_read_ARCH(p), tableType); } +static U32 LZ5_hashPosition(const void* p, tableType_t tableType) { return LZ5_hashSequenceT(MEM_read_ARCH(p), tableType); } static void LZ5_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) { @@ -543,7 +170,7 @@ FORCE_INLINE int LZ5_compress_generic( BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; - U32 forwardH; + U32 forwardH, last_off=1; size_t refDelta=0; /* Init conditions */ @@ -609,7 +236,7 @@ FORCE_INLINE int LZ5_compress_generic( } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0) || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ5_read32(match+refDelta) != LZ5_read32(ip)) ); + || (MEM_read32(match+refDelta) != MEM_read32(ip)) ); } /* Catch up */ @@ -622,12 +249,12 @@ FORCE_INLINE int LZ5_compress_generic( if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) return 0; /* Check output limit */ - if (ip-match < (1<<10)) + if (ip-match >= LZ5_SHORT_OFFSET_DISTANCE && ip-match < LZ5_MID_OFFSET_DISTANCE && (U32)(ip-match) != last_off) { - if (litLength>=RUN_MASK2) + if (litLength>=RUN_MASK) { - int len = (int)litLength-RUN_MASK2; - *token=(RUN_MASK2<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -635,10 +262,10 @@ FORCE_INLINE int LZ5_compress_generic( } else { - if (litLength>=RUN_MASK) + if (litLength>=RUN_MASK2) { - int len = (int)litLength-RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -646,27 +273,35 @@ FORCE_INLINE int LZ5_compress_generic( } /* Copy Literals */ - LZ5_wildCopy(op, anchor, op+litLength); + MEM_wildCopy(op, anchor, op+litLength); op+=litLength; } _next_match: /* Encode Offset */ - if (ip-match < (1<<10)) + if ((U32)(ip-match) == last_off) + { + *token+=(3<>8))< matchlimit) limit = matchlimit; - matchLength = LZ5_count(ip+MINMATCH, match+MINMATCH, limit); + matchLength = MEM_count(ip+MINMATCH, match+MINMATCH, limit); ip += MINMATCH + matchLength; if (ip==limit) { - unsigned more = LZ5_count(ip, (const BYTE*)source, matchlimit); + unsigned more = MEM_count(ip, (const BYTE*)source, matchlimit); matchLength += more; ip += more; } } else { - matchLength = LZ5_count(ip+MINMATCH, match+MINMATCH, matchlimit); + matchLength = MEM_count(ip+MINMATCH, match+MINMATCH, matchlimit); ip += MINMATCH + matchLength; } @@ -732,7 +367,7 @@ FORCE_INLINE int LZ5_compress_generic( LZ5_putPosition(ip, ctx, tableType, base); if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1) && (match+MAX_DISTANCE>=ip) - && (LZ5_read32(match+refDelta)==LZ5_read32(ip)) ) + && (MEM_read32(match+refDelta)==MEM_read32(ip)) ) { token=op++; *token=0; goto _next_match; } /* Prepare next loop */ @@ -775,14 +410,14 @@ int LZ5_compress_fast_extState(void* state, const char* source, char* dest, int if (inputSize < LZ5_64Klimit) return LZ5_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, acceleration); else - return LZ5_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ5_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + return LZ5_compress_generic(state, source, dest, inputSize, 0, notLimited, MEM_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); } else { if (inputSize < LZ5_64Klimit) return LZ5_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); else - return LZ5_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ5_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + return LZ5_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, MEM_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); } } @@ -822,7 +457,7 @@ int LZ5_compress_fast_force(const char* source, char* dest, int inputSize, int m if (inputSize < LZ5_64Klimit) return LZ5_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); else - return LZ5_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ5_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + return LZ5_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, MEM_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); } @@ -852,7 +487,7 @@ static int LZ5_compress_destSize_generic( BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */); BYTE* const oMaxSeq = oMaxLit - 1 /* token */; - U32 forwardH; + U32 forwardH, last_off=1; /* Init conditions */ @@ -891,7 +526,7 @@ static int LZ5_compress_destSize_generic( LZ5_putPositionOnHash(ip, h, ctx, tableType, base); } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ5_read32(match) != LZ5_read32(ip)) ); + || (MEM_read32(match) != MEM_read32(ip)) ); } /* Catch up */ @@ -907,13 +542,13 @@ static int LZ5_compress_destSize_generic( op--; goto _last_literals; } - - if (ip-match < (1<<10)) + + if ((U32)(ip-match) >= LZ5_SHORT_OFFSET_DISTANCE && (U32)(ip-match) < LZ5_MID_OFFSET_DISTANCE && (U32)(ip-match) != last_off) { - if (litLength>=RUN_MASK2) + if (litLength>=RUN_MASK) { - int len = (int)litLength-RUN_MASK2; - *token=(RUN_MASK2<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -921,10 +556,10 @@ static int LZ5_compress_destSize_generic( } else { - if (litLength>=RUN_MASK) + if (litLength>=RUN_MASK2) { - int len = (int)litLength-RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -932,33 +567,39 @@ static int LZ5_compress_destSize_generic( } /* Copy Literals */ - LZ5_wildCopy(op, anchor, op+litLength); + MEM_wildCopy(op, anchor, op+litLength); op += litLength; } _next_match: /* Encode Offset */ - if (ip-match < (1<<10)) + if ((U32)(ip-match) == last_off) + { + *token+=(3<>8))< oMaxMatch) { @@ -990,7 +631,7 @@ static int LZ5_compress_destSize_generic( match = LZ5_getPosition(ip, ctx, tableType, base); LZ5_putPosition(ip, ctx, tableType, base); if ( (match+MAX_DISTANCE>=ip) - && (LZ5_read32(match)==LZ5_read32(ip)) ) + && (MEM_read32(match)==MEM_read32(ip)) ) { token=op++; *token=0; goto _next_match; } /* Prepare next loop */ @@ -1043,7 +684,7 @@ static int LZ5_compress_destSize_extState (void* state, const char* src, char* d if (*srcSizePtr < LZ5_64Klimit) return LZ5_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16); else - return LZ5_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ5_64bits() ? byU32 : byPtr); + return LZ5_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, MEM_64bits() ? byU32 : byPtr); } } @@ -1278,6 +919,7 @@ FORCE_INLINE int LZ5_decompress_generic( const int safeDecode = (endOnInput==endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(LZ5_DICT_SIZE))); + U32 last_off = 1; /* Special cases */ if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ @@ -1295,7 +937,7 @@ FORCE_INLINE int LZ5_decompress_generic( /* get literal length */ token = *ip++; - if (token>>7) + if (token>>6) { if ((length=(token>>ML_BITS)&RUN_MASK2) == RUN_MASK2) { @@ -1328,7 +970,7 @@ FORCE_INLINE int LZ5_decompress_generic( /* copy literals */ cpy = op+length; - if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(1+1+LASTLITERALS))) ) + if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(0+1+LASTLITERALS))) ) || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH))) { if (partialDecoding) @@ -1346,10 +988,23 @@ FORCE_INLINE int LZ5_decompress_generic( op += length; break; /* Necessarily EOF, due to parsing restrictions */ } - LZ5_wildCopy(op, ip, cpy); + MEM_wildCopy(op, ip, cpy); ip += length; op = cpy; /* get offset */ +#if 0 + switch (token>>6) + { + default: offset = *ip + (((token>>ML_RUN_BITS2)&3)<<8); ip++; break; + case 0: offset = MEM_readLE16(ip); ip+=2; break; + case 1: + if ((token>>5) == 3) + offset = last_off; + else // (token>>ML_RUN_BITS2) == 2 + { offset = MEM_readLE24(ip); ip+=3; } + break; + } +#else if (token>>7) { offset = *ip + (((token>>ML_RUN_BITS2)&3)<<8); ip++; @@ -1357,12 +1012,20 @@ FORCE_INLINE int LZ5_decompress_generic( else if ((token>>ML_RUN_BITS) == 0) { - offset = LZ5_readLE16(ip); ip+=2; + offset = MEM_readLE16(ip); ip+=2; } - else // length == 1 + else + if ((token>>ML_RUN_BITS2) == 2) { - offset = LZ5_readLE24(ip); ip+=3; + offset = MEM_readLE24(ip); ip+=3; } + else // (token>>ML_RUN_BITS2) == 3 + { + offset = last_off; + } +#endif + + last_off = offset; match = op - offset; if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside buffers */ @@ -1426,7 +1089,7 @@ FORCE_INLINE int LZ5_decompress_generic( match += dec32table[offset]; memcpy(op+4, match, 4); match -= dec64; - } else { LZ5_copy8(op, match); match+=8; } + } else { MEM_copy8(op, match); match+=8; } op += 8; if (unlikely(cpy>oend-12)) @@ -1435,14 +1098,14 @@ FORCE_INLINE int LZ5_decompress_generic( if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { - LZ5_wildCopy(op, match, oCopyLimit); + MEM_wildCopy(op, match, oCopyLimit); match += oCopyLimit - op; op = oCopyLimit; } while (op +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ +#else +# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# if defined(__GNUC__) || defined(__clang__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif /* _MSC_VER */ + +#define LZ5_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#if (LZ5_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + + +/************************************** +* Memory routines +**************************************/ +#include /* malloc, calloc, free */ +#define ALLOCATOR(n,s) calloc(n,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/************************************** +* Common Constants +**************************************/ +#define MINMATCH 3 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (WILDCOPYLENGTH+MINMATCH) +static const int LZ5_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define MAXD_LOG 22 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) +#define LZ5_DICT_SIZE (1 << MAXD_LOG) + +#define ML_BITS 3 +#define ML_MASK ((1U<= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + + +/* ************************************* +* HC Inline functions and Macros +***************************************/ +#include "mem.h" // MEM_read + +#if MINMATCH == 3 + #define MEM_read24(ptr) (uint32_t)(MEM_read32(ptr)<<8) +#else + #define MEM_read24(ptr) (uint32_t)(MEM_read32(ptr)) +#endif + +static const U32 prime3bytes = 506832829U; +static U32 LZ5HC_hash3(U32 u, U32 h) { return (u * prime3bytes) << (32-24) >> (32-h) ; } +static size_t LZ5HC_hash3Ptr(const void* ptr, U32 h) { return LZ5HC_hash3(MEM_read32(ptr), h); } + +static const U32 prime4bytes = 2654435761U; +static U32 LZ5HC_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t LZ5HC_hash4Ptr(const void* ptr, U32 h) { return LZ5HC_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t LZ5HC_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } +static size_t LZ5HC_hash5Ptr(const void* p, U32 h) { return LZ5HC_hash5(MEM_read64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t LZ5HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } +static size_t LZ5HC_hash6Ptr(const void* p, U32 h) { return LZ5HC_hash6(MEM_read64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t LZ5HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } +static size_t LZ5HC_hash7Ptr(const void* p, U32 h) { return LZ5HC_hash7(MEM_read64(p), h); } + +static size_t LZ5HC_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return LZ5HC_hash4Ptr(p, hBits); + case 5: return LZ5HC_hash5Ptr(p, hBits); + case 6: return LZ5HC_hash6Ptr(p, hBits); + case 7: return LZ5HC_hash7Ptr(p, hBits); + } +} + + +/************************************** +* HC Local Macros +**************************************/ +#define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) +#define MAX(a,b) ((a)>(b))?(a):(b) + +#define LZ5_SHORT_LITERALS ((1< LZ5_MID_OFFSET_DISTANCE) || (offset 0) + sum = MAX(common + literals, best_common); + else + sum = MAX(common, best_common - literals); + +// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 0 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 0 : (best_off), best_common - MINMATCH); + return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 0 : off) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 0 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); +} + + + +/* ************************************* +* HC Types +***************************************/ +/** from faster to stronger */ +typedef enum { LZ5HC_fast, LZ5HC_price_fast, LZ5HC_lowest_price } LZ5HC_strategy; + +typedef struct +{ + U32 windowLog; /* largest match distance : impact decompression buffer size */ + U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ + U32 hashLog; /* dispatch table : larger == more memory, faster*/ + U32 hashLog3; /* dispatch table : larger == more memory, faster*/ + U32 searchNum; /* nb of searches : larger == more compression, slower*/ + U32 searchLength; /* size of matches : larger == faster decompression */ + LZ5HC_strategy strategy; +} LZ5HC_parameters; + + +struct LZ5HC_Data_s +{ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + const BYTE* end; /* next block here to continue on current prefix */ + const BYTE* base; /* All index relative to this position */ + const BYTE* dictBase; /* alternate base for extDict */ + BYTE* inputBuffer; /* deprecated */ + BYTE* outputBuffer; /* deprecated */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more dict */ + U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 compressionLevel; + U32 last_off; + LZ5HC_parameters params; +}; + + +/* ************************************* +* HC Pre-defined compression levels +***************************************/ +#define LZ5HC_MAX_CLEVEL 13 + +static const int g_maxCompressionLevel = LZ5HC_MAX_CLEVEL; +static const int LZ5HC_compressionLevel_default = 6; + +static const LZ5HC_parameters LZ5HC_defaultParameters[LZ5HC_MAX_CLEVEL+1] = +{ + /* W, C, H, H3, S, L, strat */ + { 0, 0, 0, 0, 0, 0, LZ5HC_fast }, // level 0 - never used + { 22, 22, 13, 0, 4, 6, LZ5HC_fast }, // level 1 + // { 22, 22, 14, 0, 4, 6, LZ5HC_fast }, // level 2 + { 22, 22, 13, 0, 2, 6, LZ5HC_fast }, // level 3 + // { 22, 22, 14, 0, 2, 6, LZ5HC_fast }, // level 4 + // { 22, 22, 13, 0, 2, 5, LZ5HC_fast }, // level 5 + // { 22, 22, 14, 0, 2, 5, LZ5HC_fast }, // level 6 + { 22, 22, 13, 0, 1, 5, LZ5HC_fast }, // level 7 + // { 22, 22, 14, 0, 1, 5, LZ5HC_fast }, // level 8 + // { 22, 22, 15, 0, 1, 5, LZ5HC_fast }, // level 9 + // { 22, 22, 17, 0, 1, 5, LZ5HC_fast }, // level 10 + // { 22, 22, 14, 13, 4, 6, LZ5HC_price_fast }, // level 12 + // { 22, 22, 14, 13, 2, 5, LZ5HC_price_fast }, // level 13 + { 22, 22, 14, 13, 1, 4, LZ5HC_price_fast }, // level 14 + { 22, 22, 17, 13, 1, 4, LZ5HC_price_fast }, // level 15 + { 22, 22, 15, 13, 1, 4, LZ5HC_lowest_price }, // level 16 + { 22, 22, 17, 13, 1, 4, LZ5HC_lowest_price }, // level 17 + { 22, 22, 19, 16, 1, 4, LZ5HC_lowest_price }, // level 18 + { 22, 22, 23, 16, 3, 4, LZ5HC_lowest_price }, // level 19 + { 22, 22, 23, 16, 8, 4, LZ5HC_lowest_price }, // level 20 + { 22, 22, 23, 16, 32, 4, LZ5HC_lowest_price }, // level 21 + { 22, 22, 23, 16, 128, 4, LZ5HC_lowest_price }, // level 22 + { 22, 22, 23, 16, 1024, 4, LZ5HC_lowest_price }, // level 23 +}; + + +#if defined (__cplusplus) +} +#endif + +#endif /* LZ5COMMON_H */ diff --git a/lib/lz5frame.c b/lib/lz5frame.c index 945e4ff..b81d0a6 100644 --- a/lib/lz5frame.c +++ b/lib/lz5frame.c @@ -53,6 +53,7 @@ You can contact the author at : #define ALLOCATOR(s) calloc(1,s) #define FREEMEM free #include /* memset, memcpy, memmove */ +#include #define MEM_INIT memset @@ -98,14 +99,14 @@ typedef unsigned long long U64; #define _8BITS 0xFF #define LZ5F_MAGIC_SKIPPABLE_START 0x184D2A50U -#define LZ5F_MAGICNUMBER 0x184D2204U +#define LZ5F_MAGICNUMBER 0x184D2205U #define LZ5F_BLOCKUNCOMPRESSED_FLAG 0x80000000U #define LZ5F_BLOCKSIZEID_DEFAULT LZ5F_max64KB static const size_t minFHSize = 7; static const size_t maxFHSize = 15; static const size_t BHSize = 4; -static const int minHClevel = 3; +static const int minHClevel = 1; /************************************** @@ -334,7 +335,7 @@ size_t LZ5F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf dstPtr += errorCode; if (prefs.compressionLevel >= (int)minHClevel) /* no allocation necessary with lz5 fast */ - FREEMEM(cctxI.lz5CtxPtr); + LZ5_freeStreamHC(cctxI.lz5CtxPtr); return (dstPtr - dstStart); } @@ -374,7 +375,10 @@ LZ5F_errorCode_t LZ5F_freeCompressionContext(LZ5F_compressionContext_t LZ5F_comp if (cctxPtr != NULL) /* null pointers can be safely provided to this function, like free() */ { - FREEMEM(cctxPtr->lz5CtxPtr); + if (cctxPtr->prefs.compressionLevel < minHClevel) + FREEMEM(cctxPtr->lz5CtxPtr); + else + LZ5_freeStreamHC(cctxPtr->lz5CtxPtr); FREEMEM(cctxPtr->tmpBuff); FREEMEM(LZ5F_compressionContext); } @@ -409,11 +413,14 @@ size_t LZ5F_compressBegin(LZ5F_compressionContext_t compressionContext, void* ds U32 tableID = (cctxPtr->prefs.compressionLevel < minHClevel) ? 1 : 2; /* 0:nothing ; 1:LZ5 table ; 2:HC tables */ if (cctxPtr->lz5CtxLevel < tableID) { - FREEMEM(cctxPtr->lz5CtxPtr); + if (cctxPtr->prefs.compressionLevel < minHClevel) + FREEMEM(cctxPtr->lz5CtxPtr); + else + LZ5_freeStreamHC(cctxPtr->lz5CtxPtr); if (cctxPtr->prefs.compressionLevel < minHClevel) cctxPtr->lz5CtxPtr = (void*)LZ5_createStream(); else - cctxPtr->lz5CtxPtr = (void*)LZ5_createStreamHC(); + cctxPtr->lz5CtxPtr = (void*)LZ5_createStreamHC(cctxPtr->prefs.compressionLevel); cctxPtr->lz5CtxLevel = tableID; } } @@ -439,7 +446,7 @@ size_t LZ5F_compressBegin(LZ5F_compressionContext_t compressionContext, void* ds if (cctxPtr->prefs.compressionLevel < minHClevel) LZ5_resetStream((LZ5_stream_t*)(cctxPtr->lz5CtxPtr)); else - LZ5_resetStreamHC((LZ5_streamHC_t*)(cctxPtr->lz5CtxPtr), cctxPtr->prefs.compressionLevel); + LZ5_resetStreamHC((LZ5_streamHC_t*)(cctxPtr->lz5CtxPtr)); /* Magic Number */ LZ5F_writeLE32(dstPtr, LZ5F_MAGICNUMBER); @@ -493,14 +500,14 @@ size_t LZ5F_compressBound(size_t srcSize, const LZ5F_preferences_t* preferencesP } -typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level); +typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize); -static size_t LZ5F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz5ctx, int level) +static size_t LZ5F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz5ctx) { /* compress one block */ BYTE* cSizePtr = (BYTE*)dst; U32 cSize; - cSize = (U32)compress(lz5ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1), level); + cSize = (U32)compress(lz5ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1)); LZ5F_writeLE32(cSizePtr, cSize); if (cSize == 0) /* compression failed */ { @@ -512,21 +519,18 @@ static size_t LZ5F_compressBlock(void* dst, const void* src, size_t srcSize, com } -static int LZ5F_localLZ5_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ5F_localLZ5_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstSize) { - (void) level; return LZ5_compress_limitedOutput_withState(ctx, src, dst, srcSize, dstSize); } -static int LZ5F_localLZ5_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ5F_localLZ5_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize) { - (void) level; return LZ5_compress_limitedOutput_continue((LZ5_stream_t*)ctx, src, dst, srcSize, dstSize); } -static int LZ5F_localLZ5_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ5F_localLZ5_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize) { - (void) level; return LZ5_compress_HC_continue((LZ5_streamHC_t*)ctx, src, dst, srcSize, dstSize); } @@ -599,7 +603,7 @@ size_t LZ5F_compressUpdate(LZ5F_compressionContext_t compressionContext, void* d memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy); srcPtr += sizeToCopy; - dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz5CtxPtr); if (cctxPtr->prefs.frameInfo.blockMode==LZ5F_blockLinked) cctxPtr->tmpIn += blockSize; cctxPtr->tmpInSize = 0; @@ -610,7 +614,7 @@ size_t LZ5F_compressUpdate(LZ5F_compressionContext_t compressionContext, void* d { /* compress full block */ lastBlockCompressed = fromSrcBuffer; - dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz5CtxPtr); srcPtr += blockSize; } @@ -618,7 +622,7 @@ size_t LZ5F_compressUpdate(LZ5F_compressionContext_t compressionContext, void* d { /* compress remaining input < blockSize */ lastBlockCompressed = fromSrcBuffer; - dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz5CtxPtr); srcPtr = srcEnd; } @@ -687,7 +691,7 @@ size_t LZ5F_flush(LZ5F_compressionContext_t compressionContext, void* dstBuffer, compress = LZ5F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel); /* compress tmp buffer */ - dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz5CtxPtr); if (cctxPtr->prefs.frameInfo.blockMode==LZ5F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize; cctxPtr->tmpInSize = 0; @@ -1152,7 +1156,7 @@ size_t LZ5F_decompress(LZ5F_decompressionContext_t decompressionContext, dctxPtr->dStage = dstage_getSuffix; break; } - if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-LZ5F_ERROR_GENERIC; /* invalid cBlockSize */ + if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-LZ5F_ERROR_GENERIC; /* invalid cBlockSize */ dctxPtr->tmpInTarget = nextCBlockSize; if (LZ5F_readLE32(selectedIn) & LZ5F_BLOCKUNCOMPRESSED_FLAG) { @@ -1246,7 +1250,7 @@ size_t LZ5F_decompress(LZ5F_decompressionContext_t decompressionContext, decoder = LZ5F_decompress_safe; decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize); - if (decodedSize < 0) return (size_t)-LZ5F_ERROR_GENERIC; /* decompression failed */ + if (decodedSize < 0) return (size_t)-LZ5F_ERROR_GENERIC; /* decompression failed */ if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize); if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize; diff --git a/lib/lz5hc.c b/lib/lz5hc.c index ae1a7c8..de18f9b 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -1,6 +1,7 @@ /* LZ5 HC - High Compression Mode of LZ5 Copyright (C) 2011-2015, Yann Collet. + Copyright (C) 2015, Przemyslaw Skibinski BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -34,151 +35,135 @@ -/* ************************************* -* Tuning Parameter -***************************************/ -static const int LZ5HC_compressionLevel_default = 9; - -/*! - * HEAPMODE : - * Select how default compression function will allocate workplace memory, - * in stack (0:fastest), or in heap (1:requires malloc()). - * Since workplace is rather large, heap mode is recommended. - */ -#define LZ5HC_HEAPMODE 0 - /* ************************************* * Includes ***************************************/ +#include "lz5common.h" +#include "lz5.h" #include "lz5hc.h" +#include -/* ************************************* -* Local Compiler Options -***************************************/ -#if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -#endif - +/************************************** +* HC Compression +**************************************/ -/* ************************************* -* Common LZ5 definition -***************************************/ -#define LZ5_COMMONDEFS_ONLY -#include "lz5.c" +int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* ctx, int compressionLevel) +{ + ctx->compressionLevel = compressionLevel; + if (compressionLevel > g_maxCompressionLevel) ctx->compressionLevel = g_maxCompressionLevel; + if (compressionLevel < 1) ctx->compressionLevel = LZ5HC_compressionLevel_default; -/* ************************************* -* Local Constants -***************************************/ -#define DICTIONARY_LOGSIZE 22 -#define MAXD (1<params = LZ5HC_defaultParameters[ctx->compressionLevel]; -#define HASH_LOG (DICTIONARY_LOGSIZE-1) -#define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) + ctx->hashTable = (U32*) ALLOCATOR(1, sizeof(U32)*((1 << ctx->params.hashLog3)+(1 << ctx->params.hashLog))); + if (!ctx->hashTable) + return 0; -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) + ctx->hashTable3 = ctx->hashTable + (1 << ctx->params.hashLog); -static const int g_maxCompressionLevel = 16; + ctx->chainTable = (U32*) ALLOCATOR(1, sizeof(U32)*(1 << ctx->params.contentLog)); + if (!ctx->chainTable) + { + FREEMEM(ctx->hashTable); + ctx->hashTable = NULL; + return 0; + } + return 1; +} -/************************************** -* Local Types -**************************************/ -struct LZ5HC_Data_s +void LZ5_free_mem_HC(LZ5HC_Data_Structure* ctx) { - U32* hashTable; - U32* chainTable; - const BYTE* end; /* next block here to continue on current prefix */ - const BYTE* base; /* All index relative to this position */ - const BYTE* dictBase; /* alternate base for extDict */ - BYTE* inputBuffer; /* deprecated */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more dict */ - U32 nextToUpdate; /* index from which to continue dictionary update */ - U32 compressionLevel; -}; - - -/************************************** -* Local Macros -**************************************/ -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) -//#define DELTANEXTU16(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ -#define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */ -#define DELTANEXTU32(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ - -static U32 LZ5HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ5_read32(ptr)); } - -#define LZ5HC_LIMIT (1<chainTable) FREEMEM(ctx->chainTable); + if (ctx->hashTable) FREEMEM(ctx->hashTable); +} -/************************************** -* HC Compression -**************************************/ -static void LZ5HC_init (LZ5HC_Data_Structure* hc4, const BYTE* start) +static void LZ5HC_init (LZ5HC_Data_Structure* ctx, const BYTE* start) { - MEM_INIT((void*)hc4->hashTable, 0, sizeof(U32)*HASHTABLESIZE); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(U32)*MAXD); - hc4->nextToUpdate = LZ5HC_LIMIT; - hc4->base = start - LZ5HC_LIMIT; - hc4->end = start; - hc4->dictBase = start - LZ5HC_LIMIT; - hc4->dictLimit = LZ5HC_LIMIT; - hc4->lowLimit = LZ5HC_LIMIT; + MEM_INIT((void*)ctx->hashTable, 0, sizeof(U32)*((1 << ctx->params.hashLog) + (1 << ctx->params.hashLog3))); + MEM_INIT(ctx->chainTable, 0xFF, sizeof(U32)*(1 << ctx->params.contentLog)); + + ctx->nextToUpdate = (1 << ctx->params.windowLog); + ctx->base = start - (1 << ctx->params.windowLog); + ctx->end = start; + ctx->dictBase = start - (1 << ctx->params.windowLog); + ctx->dictLimit = (1 << ctx->params.windowLog); + ctx->lowLimit = (1 << ctx->params.windowLog); + ctx->last_off = 1; } /* Update chains up to ip (excluded) */ -FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* hc4, const BYTE* ip) +FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* ctx, const BYTE* ip) { - U32* chainTable = hc4->chainTable; - U32* HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; + U32* chainTable = ctx->chainTable; + U32* HashTable = ctx->hashTable; +#if MINMATCH == 3 + U32* HashTable3 = ctx->hashTable3; +#endif + const BYTE* const base = ctx->base; const U32 target = (U32)(ip - base); - U32 idx = hc4->nextToUpdate; + const U32 contentMask = (1 << ctx->params.contentLog) - 1; + U32 idx = ctx->nextToUpdate; while(idx < target) { - U32 h = LZ5HC_hashPtr(base+idx); - size_t delta = idx - HashTable[h]; - if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; -// DELTANEXTU16(idx) = (U16)delta; - DELTANEXTU32(idx) = (U32)delta; + U32 h = LZ5HC_hashPtr(base+idx, ctx->params.hashLog, ctx->params.searchLength); + chainTable[idx & contentMask] = (U32)(idx - HashTable[h]); HashTable[h] = idx; - idx++; +#if MINMATCH == 3 + HashTable3[LZ5HC_hash3Ptr(base+idx, ctx->params.hashLog3)] = idx; +#endif + idx++; } - hc4->nextToUpdate = target; + ctx->nextToUpdate = target; } - -FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* Index table will be updated */ + +FORCE_INLINE int LZ5HC_FindBestMatch (LZ5HC_Data_Structure* ctx, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts) + const BYTE** matchpos) { - U32* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const BYTE* const dictBase = hc4->dictBase; - const U32 dictLimit = hc4->dictLimit; - const U32 lowLimit = (hc4->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); + U32* const chainTable = ctx->chainTable; + U32* const HashTable = ctx->hashTable; + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const U32 contentMask = (1 << ctx->params.contentLog) - 1; U32 matchIndex; const BYTE* match; - int nbAttempts=maxNbAttempts; - size_t ml=0; + int nbAttempts=ctx->params.searchNum; + size_t ml=0, mlt; + + matchIndex = HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; - /* HC4 match finder */ - LZ5HC_Insert(hc4, ip); - matchIndex = HashTable[LZ5HC_hashPtr(ip)]; + match = ip - ctx->last_off; + if (MEM_read24(match) == MEM_read24(ip)) + { + ml = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + return (int)ml; + } + +#if MINMATCH == 3 + size_t offset = ip - base - ctx->hashTable3[LZ5HC_hash3Ptr(ip, ctx->params.hashLog3)]; + if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) + { + match = ip - offset; + if (match > base && MEM_read24(ip) == MEM_read24(match)) + { + ml = 3;//MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + } + } +#endif while ((matchIndex>=lowLimit) && (nbAttempts)) { @@ -186,121 +171,329 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I if (matchIndex >= dictLimit) { match = base + matchIndex; - if (*(match+ml) == *(ip+ml) - && (LZ5_read32(match) == LZ5_read32(ip))) + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) + { + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) +// if (mlt > ml && (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + { ml = mlt; *matchpos = match; } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) + // if (mlt > ml && (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + matchIndex -= chainTable[matchIndex & contentMask]; + } + + return (int)ml; +} + + +FORCE_INLINE int LZ5HC_FindBestMatchFast (LZ5HC_Data_Structure* ctx, U32 matchIndex, U32 matchIndex3, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + const BYTE** matchpos) +{ + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const BYTE* match; + size_t ml=0, mlt; + + match = ip - ctx->last_off; + if (MEM_read24(match) == MEM_read24(ip)) + { + ml = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + return (int)ml; + } + +#if MINMATCH == 3 + size_t offset = ip - base - matchIndex3; + if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) + { + match = ip - offset; + if (match > base && MEM_read24(ip) == MEM_read24(match)) + { + ml = 3;//MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + } + } +#endif + + if (matchIndex>=lowLimit) + { + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) + { + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) + // if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + { ml = mlt; *matchpos = match; } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) +// if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + } + + return (int)ml; +} + + +FORCE_INLINE int LZ5HC_FindBestMatchFaster (LZ5HC_Data_Structure* ctx, U32 matchIndex, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + const BYTE** matchpos) +{ + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const BYTE* match; + size_t ml=0, mlt; + + match = ip - ctx->last_off; + if (MEM_read24(match) == MEM_read24(ip)) + { + ml = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + return (int)ml; + } + + if (matchIndex>=lowLimit) + { + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) { - size_t mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; if (mlt > ml) { ml = mlt; *matchpos = match; } } } else { match = dictBase + matchIndex; - if (LZ5_read32(match) == LZ5_read32(ip)) + if (MEM_read32(match) == MEM_read32(ip)) { - size_t mlt; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iLimit) vLimit = iLimit; - mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) - mlt += LZ5_count(ip+mlt, base+dictLimit, iLimit); + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } -// matchIndex -= DELTANEXTU16(matchIndex); - matchIndex -= DELTANEXTU32(matchIndex); } + + return (int)ml; +} + +FORCE_INLINE int LZ5HC_FindBestMatchFastest (LZ5HC_Data_Structure* ctx, U32 matchIndex, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + const BYTE** matchpos) +{ + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const BYTE* match; + size_t ml=0, mlt; + + if (matchIndex>=lowLimit) + { + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) + { + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) { ml = mlt; *matchpos = match; } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); + if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + } + return (int)ml; } -FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( - LZ5HC_Data_Structure* hc4, +FORCE_INLINE int LZ5HC_GetWiderMatch ( + LZ5HC_Data_Structure* ctx, const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, int longest, const BYTE** matchpos, - const BYTE** startpos, - const int maxNbAttempts) + const BYTE** startpos) { - U32* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const U32 dictLimit = hc4->dictLimit; + U32* const chainTable = ctx->chainTable; + U32* const HashTable = ctx->hashTable; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; - const U32 lowLimit = (hc4->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); - const BYTE* const dictBase = hc4->dictBase; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const U32 contentMask = (1 << ctx->params.contentLog) - 1; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* match; U32 matchIndex; - int nbAttempts = maxNbAttempts; - int delta = (int)(ip-iLowLimit); + int nbAttempts = ctx->params.searchNum; /* First Match */ - LZ5HC_Insert(hc4, ip); - matchIndex = HashTable[LZ5HC_hashPtr(ip)]; + matchIndex = HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; + + match = ip - ctx->last_off; + if (MEM_read24(match) == MEM_read24(ip)) + { + int mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + + int back = 0; + while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; + mlt -= back; + + if (mlt > longest) + { + *matchpos = match+back; + *startpos = ip+back; + longest = (int)mlt; + } + } + + +#if MINMATCH == 3 + size_t offset = ip - base - ctx->hashTable3[LZ5HC_hash3Ptr(ip, ctx->params.hashLog3)]; + if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) + { + match = ip - offset; + if (match > base && MEM_read24(ip) == MEM_read24(match)) + { + int mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + + int back = 0; + while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; + mlt -= back; + + if (!longest || (mlt > longest && LZ5HC_better_price(ip+back - *matchpos, longest, ip - match, mlt, ctx->last_off))) +// if (!longest || (mlt > longest && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip+back - *matchpos == ctx->last_off) ? 0 : (ip+back - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest))) + { + *matchpos = match+back; + *startpos = ip+back; + longest = (int)mlt; + } + } + } +#endif while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; if (matchIndex >= dictLimit) { - const BYTE* matchPtr = base + matchIndex; - if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) - if (LZ5_read32(matchPtr) == LZ5_read32(ip)) + match = base + matchIndex; + + // if (*(ip + longest) == *(matchPtr + longest)) + if (match < ip && MEM_read32(match) == MEM_read32(ip)) + { + int mlt = MINMATCH + MEM_count(ip+MINMATCH, match+MINMATCH, iHighLimit); + int back = 0; + + while ((ip+back>iLowLimit) + && (match+back > lowPrefixPtr) + && (ip[back-1] == match[back-1])) + back--; + + mlt -= back; + + if (!longest || (mlt > longest && LZ5HC_better_price(ip+back - *matchpos, longest, ip - match, mlt, ctx->last_off))) { - int mlt = MINMATCH + LZ5_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - int back = 0; - - while ((ip+back>iLowLimit) - && (matchPtr+back > lowPrefixPtr) - && (ip[back-1] == matchPtr[back-1])) - back--; - - mlt -= back; - - if (mlt > longest) - { - longest = (int)mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; - } + longest = (int)mlt; + *matchpos = match+back; + *startpos = ip+back; } + } } else { - const BYTE* matchPtr = dictBase + matchIndex; - if (LZ5_read32(matchPtr) == LZ5_read32(ip)) + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) { size_t mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = LZ5_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ5_count(ip+mlt, base+dictLimit, iHighLimit); - while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; + mlt += MEM_count(ip+mlt, base+dictLimit, iHighLimit); + while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--; mlt -= back; if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } } } -// matchIndex -= DELTANEXTU16(matchIndex); - matchIndex -= DELTANEXTU32(matchIndex); + matchIndex -= chainTable[matchIndex & contentMask]; } + return longest; } + typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; -#define LZ5HC_DEBUG 0 -#if LZ5HC_DEBUG -static unsigned debug = 0; -#endif +/* +LZ5 uses 3 types of codewords from 2 to 4 bytes long: +- 1_OO_LL_MMM OOOOOOOO - 10-bit offset, 3-bit match length, 2-bit literal length +- 00_LLL_MMM OOOOOOOO OOOOOOOO - 16-bit offset, 3-bit match length, 3-bit literal length +- 010_LL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 2-bit literal length +- 011_LL_MMM - last offset, 3-bit match length, 2-bit literal length +*/ FORCE_INLINE int LZ5HC_encodeSequence ( + LZ5HC_Data_Structure* ctx, const BYTE** ip, BYTE** op, const BYTE** anchor, @@ -312,47 +505,50 @@ FORCE_INLINE int LZ5HC_encodeSequence ( int length; BYTE* token; -#if LZ5HC_DEBUG - if (debug) printf("literal : %u -- match : %u -- offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match)); -#endif - /* Encode Literal length */ length = (int)(*ip - *anchor); token = (*op)++; if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ - if (*ip-match < (1<<10)) + if (*ip-match >= LZ5_SHORT_OFFSET_DISTANCE && *ip-match < LZ5_MID_OFFSET_DISTANCE && (U32)(*ip-match) != ctx->last_off) { - if (length>=(int)RUN_MASK2) { int len; *token=(RUN_MASK2< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } + if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } + if (length>=(int)RUN_MASK2) { int len; *token=(RUN_MASK2< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<last_off) + { + *token+=(3<>8))<last_off = *ip-match; /* Encode MatchLength */ length = (int)(matchLength-MINMATCH); @@ -360,6 +556,8 @@ FORCE_INLINE int LZ5HC_encodeSequence ( if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } else *token += (BYTE)(length); + LZ5HC_DEBUG("%u: ENCODE literals=%u off=%u mlen=%u out=%u\n", (U32)(*ip - ctx->inputBuffer), (U32)(*ip - *anchor), (U32)(*ip-match), (U32)matchLength, 2+(U32)(*op - ctx->outputBuffer)); + /* Prepare next loop */ *ip += matchLength; *anchor = *ip; @@ -368,17 +566,17 @@ FORCE_INLINE int LZ5HC_encodeSequence ( } -static int LZ5HC_compress_generic ( - void* ctxvoid, +static int LZ5HC_compress_lowest_price ( + LZ5HC_Data_Structure* ctx, const char* source, char* dest, int inputSize, int maxOutputSize, - int compressionLevel, limitedOutput_directive limit ) { - LZ5HC_Data_Structure* ctx = (LZ5HC_Data_Structure*) ctxvoid; + ctx->inputBuffer = (BYTE*) source; + ctx->outputBuffer = (BYTE*) dest; const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; const BYTE* const iend = ip + inputSize; @@ -388,21 +586,15 @@ static int LZ5HC_compress_generic ( BYTE* op = (BYTE*) dest; BYTE* const oend = op + maxOutputSize; - unsigned maxNbAttempts; - int ml, ml2, ml3, ml0; + int ml, ml2, ml0; const BYTE* ref=NULL; const BYTE* start2=NULL; const BYTE* ref2=NULL; - const BYTE* start3=NULL; - const BYTE* ref3=NULL; const BYTE* start0; const BYTE* ref0; - + const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; /* init */ - if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel; - if (compressionLevel < 1) compressionLevel = LZ5HC_compressionLevel_default; - maxNbAttempts = 1 << (compressionLevel-1); ctx->end += inputSize; ip++; @@ -410,151 +602,228 @@ static int LZ5HC_compress_generic ( /* Main Loop */ while (ip < mflimit) { - ml = LZ5HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); + LZ5HC_Insert(ctx, ip); + ml = LZ5HC_FindBestMatch (ctx, ip, matchlimit, (&ref)); if (!ml) { ip++; continue; } + int back = 0; + while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; + ml -= back; + ip += back; + ref += back; + /* saved, in case we would skip too much */ start0 = ip; ref0 = ref; ml0 = ml; -_Search2: - if (ip+ml < mflimit) - ml2 = LZ5HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts); - else ml2 = ml; +_Search: + if (ip+ml >= mflimit) goto _Encode; + + LZ5HC_Insert(ctx, ip); + ml2 = LZ5HC_GetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2); + if (ml2 == 0) goto _Encode; - if (ml2 == ml) /* No better match */ { - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - continue; - } + int price, best_price; + U32 off0=0, off1=0; + uint8_t *pos, *best_pos; - if (start0 < ip) + // find the lowest price for encoding ml bytes + best_pos = (uint8_t*)ip; + best_price = 1<<30; + off0 = (uint8_t*)ip - ref; + off1 = start2 - ref2; + + for (pos = (uint8_t*)ip + ml; pos >= start2; pos--) { - if (start2 < ip + ml0) /* empirical */ + int common0 = pos - ip; + if (common0 >= MINMATCH) { - ip = start0; - ref = ref0; - ml = ml0; + price = LZ5_CODEWORD_COST(ip - anchor, (off0 == ctx->last_off) ? 0 : off0, common0 - MINMATCH); + + int common1 = start2 + ml2 - pos; + if (common1 >= MINMATCH) + price += LZ5_CODEWORD_COST(0, (off1 == off0) ? 0 : (off1), common1 - MINMATCH); + else + price += LZ5_LIT_ONLY_COST(common1); + + if (price < best_price) + { + best_price = price; + best_pos = pos; + } + } + else + { + price = LZ5_CODEWORD_COST(start2 - anchor, (off1 == ctx->last_off) ? 0 : off1, ml2 - MINMATCH); + + if (price < best_price) + { + best_price = price; + best_pos = pos; + } + + break; } } + // LZ5HC_DEBUG("%u: TRY last_off=%d literals=%u off=%u mlen=%u literals2=%u off2=%u mlen2=%u best=%d\n", (U32)(ip - ctx->inputBuffer), ctx->last_off, (U32)(ip - anchor), off0, (U32)ml, (U32)(start2 - anchor), off1, ml2, (U32)(best_pos - ip)); + ml = best_pos - ip; + } + - /* Here, start0==ip */ - if ((start2 - ip) < 3) /* First Match too small : removed */ + if (ml < MINMATCH) { - ml = ml2; ip = start2; - ref =ref2; - goto _Search2; + ref = ref2; + ml = ml2; + goto _Search; } + +_Encode: -_Search3: - /* - * Currently we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) - */ - if ((start2 - ip) < OPTIMAL_ML) + if (start0 < ip) { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) + if (LZ5HC_more_profitable(ip - ref, ml, start0 - ref0, ml0, ref0 - ref, ctx->last_off)) { - start2 += correction; - ref2 += correction; - ml2 -= correction; + ip = start0; + ref = ref0; + ml = ml0; } } - /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - if (start2 + ml2 < mflimit) - ml3 = LZ5HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); - else ml3 = ml2; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; + } + + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<inputBuffer = (BYTE*) source; + ctx->outputBuffer = (BYTE*) dest; + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int ml, ml2=0; + const BYTE* ref=NULL; + const BYTE* start2=NULL; + const BYTE* ref2=NULL; + const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; + U32* HashTable = ctx->hashTable; +#if MINMATCH == 3 + U32* HashTable3 = ctx->hashTable3; +#endif + const BYTE* const base = ctx->base; + U32* HashPos, *HashPos3; + + /* init */ + ctx->end += inputSize; + + ip++; - if (ml3 == ml2) /* No better match : 2 sequences to encode */ + /* Main Loop */ + while (ip < mflimit) + { + HashPos = &HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; + HashPos3 = &HashTable3[LZ5HC_hash3Ptr(ip, ctx->params.hashLog3)]; + ml = LZ5HC_FindBestMatchFast (ctx, *HashPos, *HashPos3, ip, matchlimit, (&ref)); + *HashPos = (U32)(ip - base); +#if MINMATCH == 3 + *HashPos3 = (U32)(ip - base); +#endif + if (!ml) { ip++; continue; } + + if ((U32)(ip - ref) == ctx->last_off) { ml2=0; goto _Encode; } + { - /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - ip = start2; - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; - continue; + int back = 0; + while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; + ml -= back; + ip += back; + ref += back; } + +_Search: + if (ip+ml >= mflimit) goto _Encode; + + start2 = ip + ml - 2; + HashPos = &HashTable[LZ5HC_hashPtr(start2, ctx->params.hashLog, ctx->params.searchLength)]; + ml2 = LZ5HC_FindBestMatchFaster(ctx, *HashPos, start2, matchlimit, (&ref2)); + *HashPos = (U32)(start2 - base); + if (!ml2) goto _Encode; - if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */ { - if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ - { - if (start2 < ip+ml) - { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) - { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } + int back = 0; + while ((start2+back>ip) && (ref2+back > lowPrefixPtr) && (start2[back-1] == ref2[back-1])) back--; + ml2 -= back; + start2 += back; + ref2 += back; + } - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - ip = start3; - ref = ref3; - ml = ml3; + // LZ5HC_DEBUG("%u: TRY last_off=%d literals=%u off=%u mlen=%u literals2=%u off2=%u mlen2=%u best=%d\n", (U32)(ip - ctx->inputBuffer), ctx->last_off, (U32)(ip - anchor), off0, (U32)ml, (U32)(start2 - anchor), off1, ml2, (U32)(best_pos - ip)); - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } + if (ml2 <= ml) { ml2 = 0; goto _Encode; } - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; + if (start2 <= ip) + { + ip = start2; ref = ref2; ml = ml2; + ml2 = 0; + goto _Encode; } - /* - * OK, now we have 3 ascending matches; let's write at least the first one - * ip & ref are known; Now for ml - */ - if (start2 < ip+ml) - { - if ((start2 - ip) < (int)ML_MASK) - { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - else - { - ml = (int)(start2 - ip); - } + if (start2 - ip < 3) + { + ip = start2; ref = ref2; ml = ml2; + ml2 = 0; + goto _Search; } - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; - ip = start2; - ref = ref2; - ml = ml2; - start2 = start3; - ref2 = ref3; - ml2 = ml3; + if (start2 < ip + ml) + { + int correction = ml - (int)(start2 - ip); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < 3) { ml2 = 0; } + } + +_Encode: + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; - goto _Search3; + if (ml2) + { + ip = start2; ref = ref2; ml = ml2; + ml2 = 0; + goto _Search; + } } /* Encode Last Literals */ @@ -572,41 +841,106 @@ static int LZ5HC_compress_generic ( } -int LZ5_sizeofStateHC(void) { return sizeof(LZ5HC_Data_Structure); } -int LZ5_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) +static int LZ5HC_compress_fast ( + LZ5HC_Data_Structure* ctx, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + limitedOutput_directive limit + ) { - if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ - LZ5HC_init ((LZ5HC_Data_Structure*)state, (const BYTE*)src); - if (maxDstSize < LZ5_compressBound(srcSize)) - return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput); - else - return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit); + ctx->inputBuffer = (BYTE*) source; + ctx->outputBuffer = (BYTE*) dest; + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int ml; + const BYTE* ref=NULL; + const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; + const BYTE* const base = ctx->base; + U32* HashPos; + U32* HashTable = ctx->hashTable; + const int accel = (ctx->params.searchNum>0)?ctx->params.searchNum:1; + + /* init */ + ctx->end += inputSize; + + ip++; + + /* Main Loop */ + while (ip < mflimit) + { + HashPos = &HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; + ml = LZ5HC_FindBestMatchFastest (ctx, *HashPos, ip, matchlimit, (&ref)); + *HashPos = (U32)(ip - base); + if (!ml) { ip+=accel; continue; } + + int back = 0; + while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; + ml -= back; + ip += back; + ref += back; + + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; + + } + + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<hashTable = ALLOCATOR(1, sizeof(U32)*HASHTABLESIZE); - if (!statePtr->hashTable) - return 0; + LZ5HC_Data_Structure* ctx = (LZ5HC_Data_Structure*) ctxvoid; - statePtr->chainTable = ALLOCATOR(1, sizeof(U32)*MAXD); - if (!statePtr->chainTable) + switch(ctx->params.strategy) { - FREEMEM(statePtr->hashTable); - statePtr->hashTable = NULL; - return 0; + default: + case LZ5HC_fast: + return LZ5HC_compress_fast(ctx, source, dest, inputSize, maxOutputSize, limit); + case LZ5HC_price_fast: + return LZ5HC_compress_price_fast(ctx, source, dest, inputSize, maxOutputSize, limit); + case LZ5HC_lowest_price: + return LZ5HC_compress_lowest_price(ctx, source, dest, inputSize, maxOutputSize, limit); } - - return 1; + + return 0; } -void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr) + +int LZ5_sizeofStateHC(void) { return sizeof(LZ5HC_Data_Structure); } + +int LZ5_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { - if (statePtr->chainTable) FREEMEM(statePtr->chainTable); - if (statePtr->hashTable) FREEMEM(statePtr->hashTable); + if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ + LZ5HC_init ((LZ5HC_Data_Structure*)state, (const BYTE*)src); + if (maxDstSize < LZ5_compressBound(srcSize)) + return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, limitedOutput); + else + return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, noLimit); } + int LZ5_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) { #if LZ5HC_HEAPMODE==1 @@ -618,10 +952,10 @@ int LZ5_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int int cSize = 0; - if (!LZ5_alloc_mem_HC(statePtr)) + if (!LZ5_alloc_mem_HC(statePtr, compressionLevel)) return 0; - cSize = LZ5_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel); + cSize = LZ5_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize); LZ5_free_mem_HC(statePtr); @@ -637,36 +971,37 @@ int LZ5_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int * Streaming Functions **************************************/ /* allocation */ -LZ5_streamHC_t* LZ5_createStreamHC(void) +LZ5_streamHC_t* LZ5_createStreamHC(int compressionLevel) { LZ5HC_Data_Structure* statePtr = (LZ5HC_Data_Structure*)malloc(sizeof(LZ5_streamHC_t)); if (!statePtr) return NULL; - if (!LZ5_alloc_mem_HC(statePtr)) + if (!LZ5_alloc_mem_HC(statePtr, compressionLevel)) { FREEMEM(statePtr); return NULL; } - return (LZ5_streamHC_t*) statePtr; } int LZ5_freeStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr) { LZ5HC_Data_Structure* statePtr = (LZ5HC_Data_Structure*)LZ5_streamHCPtr; - LZ5_free_mem_HC(statePtr); - free(LZ5_streamHCPtr); + if (statePtr) + { + LZ5_free_mem_HC(statePtr); + free(LZ5_streamHCPtr); + } return 0; } /* initialization */ -void LZ5_resetStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr, int compressionLevel) +void LZ5_resetStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr) { LZ5_STATIC_ASSERT(sizeof(LZ5HC_Data_Structure) <= sizeof(LZ5_streamHC_t)); /* if compilation fails here, LZ5_STREAMHCSIZE must be increased */ ((LZ5HC_Data_Structure*)LZ5_streamHCPtr)->base = NULL; - ((LZ5HC_Data_Structure*)LZ5_streamHCPtr)->compressionLevel = (unsigned)compressionLevel; } int LZ5_loadDictHC (LZ5_streamHC_t* LZ5_streamHCPtr, const char* dictionary, int dictSize) @@ -733,7 +1068,7 @@ static int LZ5_compressHC_continue_generic (LZ5HC_Data_Structure* ctxPtr, } } - return LZ5HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit); + return LZ5HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, limit); } int LZ5_compress_HC_continue (LZ5_streamHC_t* LZ5_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize) @@ -770,10 +1105,10 @@ int LZ5_saveDictHC (LZ5_streamHC_t* LZ5_streamHCPtr, char* safeBuffer, int dictS * Deprecated Functions ***********************************/ /* Deprecated compression functions */ -/* These functions are planned to start generate warnings by r131 approximately */ +/* These functions are planned to start generate warnings by r132 approximately */ int LZ5_compressHC(const char* src, char* dst, int srcSize) { return LZ5_compress_HC (src, dst, srcSize, LZ5_compressBound(srcSize), 0); } int LZ5_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC(src, dst, srcSize, maxDstSize, 0); } int LZ5_compressHC_continue (LZ5_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_continue (ctx, src, dst, srcSize, LZ5_compressBound(srcSize)); } int LZ5_compressHC_limitedOutput_continue (LZ5_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } -int LZ5_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, LZ5_compressBound(srcSize), 0); } -int LZ5_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ5_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, LZ5_compressBound(srcSize)); } +int LZ5_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize); } diff --git a/lib/lz5hc.h b/lib/lz5hc.h index 23d37fc..167c3e2 100644 --- a/lib/lz5hc.h +++ b/lib/lz5hc.h @@ -68,11 +68,11 @@ LZ5_compress_HC : typedef struct LZ5HC_Data_s LZ5HC_Data_Structure; -int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* statePtr); +int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* statePtr, int compressionLevel); void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr); int LZ5_sizeofStateHC(void); -int LZ5_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); +int LZ5_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize); /* LZ5_compress_HC_extStateHC() : Use this function if you prefer to manually allocate memory for compression tables. @@ -103,7 +103,7 @@ typedef struct { size_t table[LZ5_STREAMHCSIZE_SIZET]; } LZ5_streamHC_t; */ -LZ5_streamHC_t* LZ5_createStreamHC(void); +LZ5_streamHC_t* LZ5_createStreamHC(int compressionLevel); int LZ5_freeStreamHC (LZ5_streamHC_t* streamHCPtr); /* These functions create and release memory for LZ5 HC streaming state. @@ -113,7 +113,7 @@ int LZ5_freeStreamHC (LZ5_streamHC_t* streamHCPtr); to avoid size mismatch between different versions. */ -void LZ5_resetStreamHC (LZ5_streamHC_t* streamHCPtr, int compressionLevel); +void LZ5_resetStreamHC (LZ5_streamHC_t* streamHCPtr); int LZ5_loadDictHC (LZ5_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); int LZ5_compress_HC_continue (LZ5_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize); @@ -152,7 +152,6 @@ int LZ5_saveDictHC (LZ5_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSi You can also define LZ5_DEPRECATE_WARNING_DEFBLOCK. */ #ifndef LZ5_DEPRECATE_WARNING_DEFBLOCK # define LZ5_DEPRECATE_WARNING_DEFBLOCK -# define LZ5_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if (LZ5_GCC_VERSION >= 405) || defined(__clang__) # define LZ5_DEPRECATED(message) __attribute__((deprecated(message))) # elif (LZ5_GCC_VERSION >= 301) @@ -166,7 +165,7 @@ int LZ5_saveDictHC (LZ5_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSi #endif // LZ5_DEPRECATE_WARNING_DEFBLOCK /* compression functions */ -/* these functions are planned to trigger warning messages by r131 approximately */ +/* these functions are planned to trigger warning messages by r132 approximately */ int LZ5_compressHC (const char* source, char* dest, int inputSize); int LZ5_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); int LZ5_compressHC_continue (LZ5_streamHC_t* LZ5_streamHCPtr, const char* source, char* dest, int inputSize); diff --git a/lib/mem.h b/lib/mem.h new file mode 100644 index 0000000..01e839f --- /dev/null +++ b/lib/mem.h @@ -0,0 +1,445 @@ +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#elif defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif // MEM_FORCE_MEMORY_ACCESS + + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + if (MEM_isLittleEndian()) + { + U32 val32 = 0; + memcpy(&val32, memPtr, 3); + return val32; + } + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)(p[0] + (p[1]<<8) + (p[2]<<16)); + } +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 value) +{ + if (MEM_isLittleEndian()) + { + memcpy(memPtr, &value, 3); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + p[2] = (BYTE)(value>>16); + } +} + + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + { + MEM_write32(memPtr, val32); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val32; + p[1] = (BYTE)(val32>>8); + p[2] = (BYTE)(val32>>16); + p[3] = (BYTE)(val32>>24); + } +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + { + MEM_write64(memPtr, val64); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val64; + p[1] = (BYTE)(val64>>8); + p[2] = (BYTE)(val64>>16); + p[3] = (BYTE)(val64>>24); + p[4] = (BYTE)(val64>>32); + p[5] = (BYTE)(val64>>40); + p[6] = (BYTE)(val64>>48); + p[7] = (BYTE)(val64>>56); + } +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + + + +#define MEM_read24(ptr) (uint32_t)(MEM_read32(ptr)<<8) + +/* ************************************** +* Function body to include for inlining +****************************************/ +static size_t MEM_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } + +#define MIN(a,b) ((a)<(b) ? (a) : (b)) + +static unsigned MEM_highbit(U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + +MEM_STATIC unsigned MEM_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) + { + if (MEM_64bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } + else /* Big Endian CPU */ + { + if (MEM_32bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } + } +} + + +MEM_STATIC size_t MEM_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + while ((pIn $(VOID) @rm -f core *.o *.test tmp* \ - lz5$(EXT) lz5c$(EXT) lz5c32$(EXT) \ + lz5$(EXT) lz5_32$(EXT) \ fullbench$(EXT) fullbench32$(EXT) \ frametest$(EXT) frametest32$(EXT) \ datagen$(EXT) @@ -107,16 +106,14 @@ clean: #make install is validated only for Linux, OSX, kFreeBSD and Hurd targets ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU)) -install: lz5 lz5c +install: lz5 @echo Installing binaries @install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ @install -m 755 lz5 $(DESTDIR)$(BINDIR)/lz5 @ln -sf lz5 $(DESTDIR)$(BINDIR)/lz5cat @ln -sf lz5 $(DESTDIR)$(BINDIR)/unlz5 - @install -m 755 lz5c $(DESTDIR)$(BINDIR)/lz5c @echo Installing man pages @install -m 644 lz5.1 $(DESTDIR)$(MANDIR)/lz5.1 - @ln -sf lz5.1 $(DESTDIR)$(MANDIR)/lz5c.1 @ln -sf lz5.1 $(DESTDIR)$(MANDIR)/lz5cat.1 @ln -sf lz5.1 $(DESTDIR)$(MANDIR)/unlz5.1 @echo lz5 installation completed @@ -125,16 +122,14 @@ uninstall: rm -f $(DESTDIR)$(BINDIR)/lz5cat rm -f $(DESTDIR)$(BINDIR)/unlz5 [ -x $(DESTDIR)$(BINDIR)/lz5 ] && rm -f $(DESTDIR)$(BINDIR)/lz5 - [ -x $(DESTDIR)$(BINDIR)/lz5c ] && rm -f $(DESTDIR)$(BINDIR)/lz5c [ -f $(DESTDIR)$(MANDIR)/lz5.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz5.1 - rm -f $(DESTDIR)$(MANDIR)/lz5c.1 rm -f $(DESTDIR)$(MANDIR)/lz5cat.1 rm -f $(DESTDIR)$(MANDIR)/unlz5.1 @echo lz5 programs successfully uninstalled -test: test-lz5 test-lz5c test-frametest test-fullbench test-fuzzer test-mem +test: test-lz5 test-frametest test-fullbench test-fuzzer test-mem -test32: test-lz5c32 test-frametest32 test-fullbench32 test-fuzzer32 test-mem32 +test32: test-lz5_32 test-frametest32 test-fullbench32 test-fuzzer32 test-mem32 test-all: test test32 @@ -231,28 +226,24 @@ test-lz5: lz5 datagen test-lz5-basic test-lz5-multiple test-lz5-sparse test-lz5- @echo "\n ---- test pass-through ----" ./datagen | ./lz5 -tf -test-lz5c: lz5c datagen - @echo "\n ---- test lz5c version ----" - ./datagen -g256MB | ./lz5c -l -v | ./lz5c -t - -test-interop-32-64: lz5 lz5c32 datagen +test-interop-32-64: lz5 lz5_32 datagen @echo "\n ---- test interoperability 32-bits -vs- 64 bits ----" - ./datagen -g16KB | ./lz5c32 -9 | ./lz5 -t - ./datagen -P10 | ./lz5 -9B4 | ./lz5c32 -t - ./datagen | ./lz5c32 | ./lz5 -t - ./datagen -g1M | ./lz5 -3B5 | ./lz5c32 -t - ./datagen -g256MB | ./lz5c32 -vqB4D | ./lz5 -qt - ./datagen -g1G -P90 | ./lz5 | ./lz5c32 -t - ./datagen -g6GB | ./lz5c32 -vq9BD | ./lz5 -qt - -test-lz5c32-basic: lz5c32 datagen - @echo "\n ---- test lz5c32 32-bits version ----" - ./datagen -g16KB | ./lz5c32 -9 | ./lz5c32 -t - ./datagen | ./lz5c32 | ./lz5c32 -t - ./datagen -g256MB | ./lz5c32 -vqB4D | ./lz5c32 -qt - ./datagen -g6GB | ./lz5c32 -vqB5D | ./lz5c32 -qt - -test-lz5c32: test-lz5c32-basic test-interop-32-64 + ./datagen -g16KB | ./lz5_32 -9 | ./lz5 -t + ./datagen -P10 | ./lz5 -9B4 | ./lz5_32 -t + ./datagen | ./lz5_32 | ./lz5 -t + ./datagen -g1M | ./lz5 -3B5 | ./lz5_32 -t + ./datagen -g256MB | ./lz5_32 -vqB4D | ./lz5 -qt + ./datagen -g1G -P90 | ./lz5 | ./lz5_32 -t + ./datagen -g6GB | ./lz5_32 -vq9BD | ./lz5 -qt + +test-lz5_32-basic: lz5_32 datagen + @echo "\n ---- test lz5_32 32-bits version ----" + ./datagen -g16KB | ./lz5_32 -9 | ./lz5_32 -t + ./datagen | ./lz5_32 | ./lz5_32 -t + ./datagen -g256MB | ./lz5_32 -vqB4D | ./lz5_32 -qt + ./datagen -g6GB | ./lz5_32 -vqB5D | ./lz5_32 -qt + +test-lz5_32: test-lz5_32-basic test-interop-32-64 test-fullbench: fullbench ./fullbench --no-prompt $(NB_LOOPS) $(TEST_FILES) @@ -291,7 +282,7 @@ test-mem: lz5 datagen fuzzer frametest fullbench valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i64 -t1 valgrind --leak-check=yes --error-exitcode=1 ./frametest -i256 -test-mem32: lz5c32 datagen +test-mem32: lz5_32 datagen # unfortunately, valgrind doesn't seem to work with non-native binary... endif diff --git a/programs/bench.c b/programs/bench.c index 09aa07a..c235e9c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -430,7 +430,7 @@ int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel) } if (nbFiles > 1) - DISPLAY("%-16.16s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100., (double)totals/totalc/1000., (double)totals/totald/1000.); + DISPLAY("%-16.16s :%10" PRIu64 " ->%10" PRIu64 " (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100.0, (double)totals/totalc/1000.0, (double)totals/totald/1000.0); if (BMK_pause) { DISPLAY("\npress enter...\n"); (void)getchar(); } diff --git a/programs/fullbench.c b/programs/fullbench.c index 8b71335..233a038 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -471,7 +471,7 @@ static int local_LZ5_compress_forceDict(const char* in, char* out, int inSize) LZ5_streamHC_t LZ5_streamHC; static void local_LZ5_resetStreamHC(void) { - LZ5_resetStreamHC(&LZ5_streamHC, 0); + LZ5_resetStreamHC(&LZ5_streamHC); } static int local_LZ5_saveDictHC(const char* in, char* out, int inSize) @@ -483,7 +483,7 @@ static int local_LZ5_saveDictHC(const char* in, char* out, int inSize) static int local_LZ5_compressHC_withStateHC(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_withStateHC(&LZ5_streamHC, in, out, inSize); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -494,7 +494,7 @@ static int local_LZ5_compressHC_withStateHC(const char* in, char* out, int inSiz static int local_LZ5_compressHC_limitedOutput_withStateHC(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_limitedOutput_withStateHC(&LZ5_streamHC, in, out, inSize, LZ5_compressBound(inSize)-1); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -505,7 +505,7 @@ static int local_LZ5_compressHC_limitedOutput_withStateHC(const char* in, char* static int local_LZ5_compressHC_limitedOutput(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_limitedOutput(in, out, inSize, LZ5_compressBound(inSize)-1); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -516,7 +516,7 @@ static int local_LZ5_compressHC_limitedOutput(const char* in, char* out, int inS static int local_LZ5_compressHC_continue(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_continue(&LZ5_streamHC, in, out, inSize); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -527,7 +527,7 @@ static int local_LZ5_compressHC_continue(const char* in, char* out, int inSize) static int local_LZ5_compressHC_limitedOutput_continue(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_limitedOutput_continue(&LZ5_streamHC, in, out, inSize, LZ5_compressBound(inSize)-1); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -734,7 +734,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) LZ5_loadDict(&LZ5_stream, chunkP[0].origBuffer, chunkP[0].origSize); break; case 41: compressionFunction = local_LZ5_saveDictHC; compressorName = "LZ5_saveDictHC"; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { LZ5_loadDictHC(&LZ5_streamHC, chunkP[0].origBuffer, chunkP[0].origSize); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); diff --git a/programs/lz5cli.c b/programs/lz5cli.c index be6ee98..5cb90eb 100644 --- a/programs/lz5cli.c +++ b/programs/lz5cli.c @@ -30,14 +30,6 @@ The license of this compression CLI program is GPLv2. */ -/************************************** -* Tuning parameters -***************************************/ -/* ENABLE_LZ5C_LEGACY_OPTIONS : - Control the availability of -c0, -c1 and -hc legacy arguments - Default : Legacy options are disabled */ -/* #define ENABLE_LZ5C_LEGACY_OPTIONS */ - /************************************** * Compiler Options @@ -60,6 +52,7 @@ #include /* strcmp, strlen */ #include "bench.h" /* BMK_benchFile, BMK_SetNbIterations, BMK_SetBlocksize, BMK_SetPause */ #include "lz5io.h" /* LZ5IO_compressFilename, LZ5IO_decompressFilename, LZ5IO_compressMultipleFilenames */ +#include "lz5common.h" /**************************** @@ -86,16 +79,12 @@ * Constants ******************************/ #define COMPRESSOR_NAME "LZ5 command line interface" -#define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ5_VERSION, AUTHOR, __DATE__ +#define AUTHOR "Y.Collet & P.Skibinski" +#define WELCOME_MESSAGE "%s %i-bit %s by %s (%s)\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ5_VERSION, AUTHOR, __DATE__ #define LZ5_EXTENSION ".lz5" #define LZ5CAT "lz5cat" #define UNLZ5 "unlz5" -#define KB *(1U<<10) -#define MB *(1U<<20) -#define GB *(1U<<30) - #define LZ5_BLOCKSIZEID_DEFAULT 7 @@ -150,12 +139,12 @@ static int usage(void) DISPLAY( "input : a filename\n"); DISPLAY( " with no FILE, or when FILE is - or %s, read standard input\n", stdinmark); DISPLAY( "Arguments :\n"); - DISPLAY( " -1 : Fast compression (default) \n"); - DISPLAY( " -9 : High compression \n"); - DISPLAY( " -d : decompression (default for %s extension)\n", LZ5_EXTENSION); - DISPLAY( " -z : force compression\n"); - DISPLAY( " -f : overwrite output without prompting \n"); - DISPLAY( " -h/-H : display help/long help and exit\n"); + DISPLAY( " -0 : Fast compression (default) \n"); + DISPLAY( " -1...-%d : High compression; higher number == more compression but slower\n", LZ5HC_MAX_CLEVEL); + DISPLAY( " -d : decompression (default for %s extension)\n", LZ5_EXTENSION); + DISPLAY( " -z : force compression\n"); + DISPLAY( " -f : overwrite output without prompting \n"); + DISPLAY( " -h/-H : display help/long help and exit\n"); return 0; } @@ -181,13 +170,6 @@ static int usage_advanced(void) DISPLAY( "Benchmark arguments :\n"); DISPLAY( " -b : benchmark file(s)\n"); DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n"); -#if defined(ENABLE_LZ5C_LEGACY_OPTIONS) - DISPLAY( "Legacy arguments :\n"); - DISPLAY( " -c0 : fast compression\n"); - DISPLAY( " -c1 : high compression\n"); - DISPLAY( " -hc : high compression\n"); - DISPLAY( " -y : overwrite output without prompting \n"); -#endif /* ENABLE_LZ5C_LEGACY_OPTIONS */ EXTENDED_HELP; return 0; } @@ -214,8 +196,8 @@ static int usage_longhelp(void) DISPLAY( "\n"); DISPLAY( "Compression levels : \n"); DISPLAY( "---------------------\n"); - DISPLAY( "-0 ... -2 => Fast compression, all identicals\n"); - DISPLAY( "-3 ... -16 => High compression; higher number == more compression but slower\n"); + DISPLAY( "-0 => Fast compression\n"); + DISPLAY( "-1 ... -%d => High compression; higher number == more compression but slower\n", LZ5HC_MAX_CLEVEL); DISPLAY( "\n"); DISPLAY( "stdin, stdout and the console : \n"); DISPLAY( "--------------------------------\n"); @@ -239,17 +221,6 @@ static int usage_longhelp(void) DISPLAY( "-------------------------------------\n"); DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n"); DISPLAY( " generator | %s | consumer \n", programName); -#if defined(ENABLE_LZ5C_LEGACY_OPTIONS) - DISPLAY( "\n"); - DISPLAY( "***** Warning *****\n"); - DISPLAY( "Legacy arguments take precedence. Therefore : \n"); - DISPLAY( "---------------------------------\n"); - DISPLAY( " %s -hc filename\n", programName); - DISPLAY( "means 'compress filename in high compression mode'\n"); - DISPLAY( "It is not equivalent to :\n"); - DISPLAY( " %s -h -c filename\n", programName); - DISPLAY( "which would display help text and exit\n"); -#endif /* ENABLE_LZ5C_LEGACY_OPTIONS */ return 0; } @@ -341,14 +312,6 @@ int main(int argc, char** argv) { argument ++; -#if defined(ENABLE_LZ5C_LEGACY_OPTIONS) - /* Legacy arguments (-c0, -c1, -hc, -y, -s) */ - if ((argument[0]=='c') && (argument[1]=='0')) { cLevel=0; argument++; continue; } /* -c0 (fast compression) */ - if ((argument[0]=='c') && (argument[1]=='1')) { cLevel=9; argument++; continue; } /* -c1 (high compression) */ - if ((argument[0]=='h') && (argument[1]=='c')) { cLevel=9; argument++; continue; } /* -hc (high compression) */ - if (*argument=='y') { LZ5IO_setOverwrite(1); continue; } /* -y (answer 'yes' to overwrite permission) */ -#endif /* ENABLE_LZ5C_LEGACY_OPTIONS */ - if ((*argument>='0') && (*argument<='9')) { cLevel = 0; diff --git a/programs/lz5io.c b/programs/lz5io.c index 6f15840..5335aa7 100644 --- a/programs/lz5io.c +++ b/programs/lz5io.c @@ -39,6 +39,10 @@ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif +/* Add support for %lld in printf */ +#define __STDC_FORMAT_MACROS // now PRIu64 will work +#include + #define _LARGE_FILES /* Large file support on 32-bits AIX */ #define _FILE_OFFSET_BITS 64 /* Large file support on 32-bits unix */ @@ -99,10 +103,10 @@ #define _8BITS 0xFF #define MAGICNUMBER_SIZE 4 -#define LZ5IO_MAGICNUMBER 0x184D2205 -#define LZ5IO_SKIPPABLE0 0x184D2A50 -#define LZ5IO_SKIPPABLEMASK 0xFFFFFFF0 -#define LEGACY_MAGICNUMBER 0x184C2102 +#define LZ5IO_MAGICNUMBER 0x184D2205U +#define LZ5IO_SKIPPABLE0 0x184D2A50U +#define LZ5IO_SKIPPABLEMASK 0xFFFFFFF0U +#define LEGACY_MAGICNUMBER 0x184C2102U #define CACHELINE 64 #define LEGACY_BLOCKSIZE (8 MB) @@ -396,7 +400,7 @@ int LZ5IO_compressFilename_Legacy(const char* input_filename, const char* output end = clock(); DISPLAYLEVEL(2, "\r%79s\r", ""); filesize += !filesize; /* avoid divide by zero */ - DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + DISPLAYLEVEL(2,"Compressed %" PRIu64 " bytes into %" PRIu64 " bytes ==> %.2f%%\n", (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); { double seconds = (double)(end - start)/CLOCKS_PER_SEC; @@ -558,7 +562,7 @@ static int LZ5IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + DISPLAYLEVEL(2, "Compressed %" PRIu64 " bytes into %" PRIu64 " bytes ==> %.2f%%\n", filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100); /* avoid division by zero */ return 0; @@ -980,7 +984,7 @@ static int LZ5IO_decompressFile_extRess(dRess_t ress, const char* input_filename /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Successfully decoded %llu bytes \n", filesize); + DISPLAYLEVEL(2, "Successfully decoded %" PRIu64 " bytes \n", filesize); /* Close */ fclose(finput);